Descripcion:
La fuente de datos estudiada son los informes mensuales que entrega el sistema rydoo. Se estudian 10 meses desde el 01/01/2022 al 31/10/2022 con un total de 17.935 registros con código chileno.
Pre procesado
#Librerias
import numpy as np
from plotnine import *
from decimal import *
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.9f' % x)
#Carga de datos
files = ['reporte_1-2022-10-2022.xlsx']
vars_name = ['Cantidad ', 'Fecha de la transacción ', 'Fecha de aprobación ', 'Fecha de finalización ', 'Trabajador ', 'Fondo fijo', 'Categoría ', 'Proveedor ', 'Proyecto ', 'Tipo Documento', 'Sucursal ']
df = pd.concat([pd.read_excel("data/"+x).loc[:,vars_name] for x in files])
#Agregar columna de fechas para agrupar
df['Fecha_de_la_transacción_month'] = df['Fecha de la transacción '].dt.month
df['Fecha_de_la_transacción_year'] = df['Fecha de la transacción '].dt.year
df['Fecha_de_la_transacción_day'] = df['Fecha de la transacción '].dt.day
df['Fecha_de_la_transacción_day_name'] = df['Fecha de la transacción '].dt.day_name()
df['Fecha_de_la_transacción_year_month'] = df['Fecha de la transacción '].dt.strftime('%Y-%m')
df['Fecha_de_la_transacción_year_month_day'] = df['Fecha_de_la_transacción_year'].astype("string") + '-' + df['Fecha_de_la_transacción_month'].astype("string") + '-' + df['Fecha_de_la_transacción_day'].astype("string")
df['delta_Fecha_aprobación_transacción'] = df['Fecha de aprobación '] - df['Fecha de la transacción ']
df['delta_Fecha_aprobación_transacción_day_int'] = df['delta_Fecha_aprobación_transacción'].dt.days
categories = pd.to_datetime(df['Fecha_de_la_transacción_year_month'].unique(), format='%Y-%m').sort_values('Fecha_de_la_transacción_year_month')[0].strftime('%Y-%m')
df['Fecha_de_la_transacción_year_month'] = pd.Categorical(df['Fecha_de_la_transacción_year_month'], categories=categories, ordered=False)
categories = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['Fecha_de_la_transacción_day_name'] = pd.Categorical(df['Fecha_de_la_transacción_day_name'], categories=categories, ordered=False)
#filtrado por tipo de documento
#df['Tipo Documento'].unique()
#>> array(['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta',
# nan, 'CHL - Factura Exenta', 'ARG - Gastos de viajes - Interior',
# 'ARG - Gastos de automóviles', 'CHL - Boleta de Honorario',
# 'ECU - Documentos internos', 'BRA - Reembolsos de despesas',
# 'ESP - Gastos de viaje', 'PER - Gastos de viaje',
# 'ECU - Reembolsos de gastos / Gastos de viaje'], dtype=object)
#Filtrar solo registros con timpo de documento = ['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta','CHL - Factura Exenta', 'CHL - Boleta de Honorario'¨]
filter_list=['CHL - Documento Exento', 'CHL - Boleta', 'CHL - Factura Afecta','CHL - Factura Exenta', 'CHL - Boleta de Honorario']
df = df[df['Tipo Documento'].isin(filter_list)]
Monto total
#Estadisticas del monto
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.rename(columns={'Fecha_de_la_transacción_year_month':'month'})
| month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-01 | 1556.000000000 | 37911.463701799 | 123997.004852571 | 0.000000000 | 4790.000000000 | 13800.000000000 | 31552.750000000 | 2176000.000000000 | 138.933863060 | 58990237.520000003 |
| 1 | 2022-02 | 1457.000000000 | 32563.518894990 | 96492.883949955 | 0.000000000 | 5000.000000000 | 13600.000000000 | 31535.000000000 | 2490722.000000000 | 323.393381191 | 47445047.030000001 |
| 2 | 2022-03 | 1943.000000000 | 53545.485028307 | 362703.909635602 | 0.000000000 | 5700.000000000 | 15000.000000000 | 36100.000000000 | 9796093.000000000 | 544.095252310 | 104038877.409999996 |
| 3 | 2022-04 | 1774.000000000 | 46222.957220970 | 155646.749259334 | 0.000000000 | 5692.500000000 | 13994.000000000 | 37190.000000000 | 2766400.000000000 | 138.001916199 | 81999526.109999999 |
| 4 | 2022-05 | 1874.000000000 | 36104.556494130 | 95593.649228895 | 10.300000000 | 5950.000000000 | 14000.000000000 | 35000.000000000 | 1686587.000000000 | 128.799146126 | 67659938.870000005 |
| 5 | 2022-06 | 1826.000000000 | 46413.754874042 | 195348.257432519 | 0.000000000 | 5000.000000000 | 12875.000000000 | 34641.750000000 | 4767229.000000000 | 268.674871754 | 84751516.400000006 |
| 6 | 2022-07 | 1749.000000000 | 36038.207547170 | 140978.797285946 | 0.000000000 | 5000.000000000 | 13500.000000000 | 35500.000000000 | 4331814.000000000 | 539.469134240 | 63030825.000000000 |
| 7 | 2022-08 | 2083.000000000 | 37860.522376380 | 138523.808540887 | 100.000000000 | 5135.000000000 | 14350.000000000 | 35990.000000000 | 3349463.000000000 | 286.141865585 | 78863468.109999999 |
| 8 | 2022-09 | 1922.000000000 | 39423.673772112 | 158905.484509575 | 130.000000000 | 5000.000000000 | 13500.000000000 | 30237.500000000 | 3500000.000000000 | 250.296476936 | 75772300.989999995 |
| 9 | 2022-10 | 1751.000000000 | 69343.237190177 | 317732.880833097 | 0.000000000 | 6500.000000000 | 18000.000000000 | 43675.000000000 | 6116040.000000000 | 199.118563174 | 121420008.319999993 |
#Suma del monto por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'sum')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('Monto(CLP)') + theme_bw() + theme(legend_position="none")
<ggplot: (139753355237)>
#Suma del monto por mes y numero de registros (en color)
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'sum', fill='count')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('Monto (CLP)') + theme_bw() + guides(size = False) + labs(fill = "N° Registros")
<ggplot: (139753708073)>
#Volumetria de registros por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('N° Registros') + theme_bw() + theme(legend_position="none")
<ggplot: (139753995457)>
#N registros por mes y monto total
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count', fill='sum')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('N° registros') + theme_bw() + guides(size = False) + labs(fill = "Monto (CLP)")
<ggplot: (139754053218)>
#Volumen de registros y Monto por mes (No se entiende bien)
ggplot(stats, aes(x='count',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + theme_bw() + labs(fill = "Monto (CLP)", size = 'N° registros')
<ggplot: (139855150525)>
#Boxplots montos por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="lightskyblue") + xlab('Mes') + ylab('Monto(CLP)') + coord_flip() + theme_bw()
<ggplot: (139855150858)>
#Boxplots montos por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="lightskyblue") + xlab('Mes') + ylab('Monto(CLP)') + coord_flip() + theme_bw()
<ggplot: (172681670888)>
#histograma del monto por mes
ggplot(df, aes(x='Cantidad ')) + geom_histogram(bins=30, fill='#0C475B') + facet_wrap('Fecha_de_la_transacción_year_month', ncol = 2, scales='free') + xlab('Monto (CLP)') + ylab('N° registros') + theme_bw() + theme(legend_position='none', panel_spacing_y=1, panel_spacing_x=0.5, aspect_ratio = 0.5, strip_text_x = element_text(size = 12), strip_margin=-0.2, strip_background=element_blank(), axis_text_x=element_text(rotation=50, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (172679896698)>
#Mediana vs promedio
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='identity', position='dodge') + xlab('Mes') + ylab('Monto') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1))
<ggplot: (139743874799)>
#estadisticas de volumetria de registros diaria (count = cantidad de dias con registros, 50% es la mediana de registros diaria)
count_by_day = df.groupby('Fecha de la transacción ')['Fecha de la transacción '].count().to_frame().rename({'Fecha de la transacción ': "count"}, axis='columns').reset_index()
count_by_day['Fecha_de_la_transacción_year_month']= count_by_day['Fecha de la transacción '].dt.strftime('%Y-%m')
stats = count_by_day.groupby('Fecha_de_la_transacción_year_month')['count'].describe()
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-01 | 30.000000000 | 51.866666667 | 29.088134011 | 2.000000000 | 18.000000000 | 62.000000000 | 71.750000000 | 97.000000000 |
| 1 | 2022-02 | 28.000000000 | 52.035714286 | 29.236560246 | 3.000000000 | 20.750000000 | 59.500000000 | 73.250000000 | 106.000000000 |
| 2 | 2022-03 | 31.000000000 | 62.677419355 | 31.368122988 | 5.000000000 | 46.000000000 | 66.000000000 | 84.500000000 | 115.000000000 |
| 3 | 2022-04 | 30.000000000 | 59.133333333 | 32.549943941 | 4.000000000 | 21.000000000 | 70.000000000 | 87.250000000 | 108.000000000 |
| 4 | 2022-05 | 31.000000000 | 60.451612903 | 33.617295856 | 4.000000000 | 23.500000000 | 69.000000000 | 86.500000000 | 117.000000000 |
| 5 | 2022-06 | 30.000000000 | 60.866666667 | 35.830764409 | 3.000000000 | 24.500000000 | 77.000000000 | 86.750000000 | 123.000000000 |
| 6 | 2022-07 | 31.000000000 | 56.419354839 | 32.288051653 | 3.000000000 | 21.000000000 | 69.000000000 | 80.000000000 | 101.000000000 |
| 7 | 2022-08 | 31.000000000 | 67.193548387 | 37.094850815 | 3.000000000 | 24.500000000 | 80.000000000 | 91.500000000 | 153.000000000 |
| 8 | 2022-09 | 30.000000000 | 64.066666667 | 39.647527499 | 2.000000000 | 18.750000000 | 78.500000000 | 89.500000000 | 121.000000000 |
| 9 | 2022-10 | 31.000000000 | 56.483870968 | 38.021810379 | 3.000000000 | 19.000000000 | 64.000000000 | 85.500000000 | 125.000000000 |
#distribucion de volumetria diaria por mes
ggplot(count_by_day, aes(x='Fecha_de_la_transacción_year_month',y='count ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Registros') + theme_bw() + coord_flip()
<ggplot: (139744309318)>
Monto por Tipo de documento
#Estadisticas de monto por mes y tipo de documento (head 10)
stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(10)
| Tipo Documento | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | CHL - Boleta | 2022-03 | 1.000000000 | 18000.000000000 | NaN | 18000.000000000 | 18000.000000000 | 18000.000000000 | 18000.000000000 | 18000.000000000 | NaN | 18000.000000000 |
| 1 | CHL - Boleta | 2022-04 | 1.000000000 | 14850.000000000 | NaN | 14850.000000000 | 14850.000000000 | 14850.000000000 | 14850.000000000 | 14850.000000000 | NaN | 14850.000000000 |
| 2 | CHL - Boleta | 2022-05 | 1.000000000 | 13265.000000000 | NaN | 13265.000000000 | 13265.000000000 | 13265.000000000 | 13265.000000000 | 13265.000000000 | NaN | 13265.000000000 |
| 3 | CHL - Boleta | 2022-06 | 5.000000000 | 8990.000000000 | 11740.017035763 | 1050.000000000 | 4700.000000000 | 4700.000000000 | 4700.000000000 | 29800.000000000 | 4.668625163 | 44950.000000000 |
| 4 | CHL - Boleta | 2022-07 | 20.000000000 | 36904.250000000 | 69628.901199880 | 2000.000000000 | 4700.000000000 | 5150.000000000 | 15000.000000000 | 245800.000000000 | 4.220933689 | 738085.000000000 |
| 5 | CHL - Boleta | 2022-08 | 29.000000000 | 14200.241379310 | 13512.394764108 | 600.000000000 | 4700.000000000 | 8920.000000000 | 20990.000000000 | 45300.000000000 | 0.095340057 | 411807.000000000 |
| 6 | CHL - Boleta | 2022-09 | 137.000000000 | 35600.818175182 | 72131.080002038 | 400.000000000 | 7000.000000000 | 14700.000000000 | 35000.000000000 | 693165.000000000 | 52.827116790 | 4877312.090000000 |
| 7 | CHL - Boleta | 2022-10 | 320.000000000 | 29155.312500000 | 31673.866655616 | 270.000000000 | 7450.000000000 | 19326.000000000 | 40000.000000000 | 220000.000000000 | 7.729252457 | 9329700.000000000 |
| 8 | CHL - Boleta de Honorario | 2022-09 | 6.000000000 | 53375.000000000 | 2547.106201162 | 50000.000000000 | 51700.000000000 | 53500.000000000 | 55000.000000000 | 56650.000000000 | -1.722597415 | 320250.000000000 |
| 9 | CHL - Boleta de Honorario | 2022-10 | 10.000000000 | 40843.700000000 | 36335.202049956 | 3000.000000000 | 9750.000000000 | 35000.000000000 | 55000.000000000 | 109687.000000000 | -0.272305811 | 408437.000000000 |
#monto total por mes por tipo de documento
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139752344631)>
#monto total por mes por tipo de documento y cantidad de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill='count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750367518)>
#N° registros por mes por tipo de documento
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750221095)>
#N° registros por mes por tipo de documento y monto total
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = 'Monto (CLP)')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139745949988)>
#Volumen de registros y Monto por mes y Tipo de documento
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139862025045)>
#boxplot monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139865036731)>
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139865818058)>
#Mediana vs promedio por tipo de documento
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Tipo Documento', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Tipo Documento'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139866235721)>
Monto por Fondo fijo / Anticipo / Reembolso
#Estadisticas por fondo fijo (head 10)
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(10)
| Fondo fijo | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANTICIPO (VxR) | 2022-01 | 57.000000000 | 327960.140350877 | 489657.449180162 | 936.000000000 | 34400.000000000 | 91105.000000000 | 500000.000000000 | 2176000.000000000 | 4.636796217 | 18693728.000000000 |
| 1 | ANTICIPO (VxR) | 2022-02 | 49.000000000 | 170791.530612245 | 294723.434672526 | 1100.000000000 | 12450.000000000 | 21277.000000000 | 175104.000000000 | 1190000.000000000 | 4.444505249 | 8368785.000000000 |
| 2 | ANTICIPO (VxR) | 2022-03 | 166.000000000 | 330403.740963855 | 1193743.781590327 | 800.000000000 | 8775.000000000 | 30167.500000000 | 135596.750000000 | 9796093.000000000 | 46.991017388 | 54847021.000000000 |
| 3 | ANTICIPO (VxR) | 2022-04 | 131.000000000 | 233181.267175573 | 468314.384100152 | 0.000000000 | 19950.000000000 | 53700.000000000 | 198589.500000000 | 2766400.000000000 | 12.488317015 | 30546746.000000000 |
| 4 | ANTICIPO (VxR) | 2022-05 | 97.000000000 | 164735.371134021 | 295118.934899535 | 2600.000000000 | 19950.000000000 | 51250.000000000 | 126660.000000000 | 1686587.000000000 | 13.031007867 | 15979331.000000000 |
| 5 | ANTICIPO (VxR) | 2022-06 | 124.000000000 | 273423.572580645 | 651614.918934341 | 759.000000000 | 19712.250000000 | 56056.000000000 | 170098.750000000 | 4767229.000000000 | 23.137718164 | 33904523.000000000 |
| 6 | ANTICIPO (VxR) | 2022-07 | 58.000000000 | 264959.241379310 | 689402.328472045 | 550.000000000 | 3887.500000000 | 12479.000000000 | 155537.500000000 | 4331814.000000000 | 22.048295861 | 15367636.000000000 |
| 7 | ANTICIPO (VxR) | 2022-08 | 92.000000000 | 218918.565217391 | 550378.407254765 | 500.000000000 | 9800.000000000 | 38795.000000000 | 108420.000000000 | 3349463.000000000 | 17.314978794 | 20140508.000000000 |
| 8 | ANTICIPO (VxR) | 2022-09 | 38.000000000 | 538696.289473684 | 905344.869130346 | 8400.000000000 | 18637.500000000 | 129227.500000000 | 556115.750000000 | 3500000.000000000 | 3.466450326 | 20470459.000000000 |
| 9 | ANTICIPO (VxR) | 2022-10 | 108.000000000 | 461148.120370370 | 935080.230456866 | 7100.000000000 | 40337.500000000 | 115110.000000000 | 409002.000000000 | 5999544.000000000 | 16.489103411 | 49803997.000000000 |
#plot monto total por mes por fondo fijo
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139868252344)>
#plot monto total por mes por fondo fijo y numero de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139868533682)>
#plot Volumen de registros por mes por fondo fijo
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869073396)>
#plot Volumen de registros por mes por fondo fijo y monto total
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869217651)>
#Volumen de registros y Monto por mes y Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank()) + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869377317)>
#boxplot monto por mes por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869522446)>
#Violin monto por mes por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869887975)>
#Mediana vs promedio por fondo fijo
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Fondo fijo', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Fondo fijo'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 10), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139871213686)>
Monto por Sucursal
#Estadisticas por Sucursal (head 5)
stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Sucursal | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-01 | 29.000000000 | 14757.379310345 | 16716.822441169 | 730.000000000 | 2200.000000000 | 7000.000000000 | 20990.000000000 | 59080.000000000 | 0.568508935 | 427964.000000000 |
| 1 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-02 | 19.000000000 | 9596.300000000 | 10793.736293126 | 650.000000000 | 2200.000000000 | 8370.000000000 | 13700.000000000 | 46954.700000000 | 7.799474485 | 182329.700000000 |
| 2 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-03 | 30.000000000 | 16061.966666667 | 13189.282363901 | 1300.000000000 | 5243.250000000 | 14700.000000000 | 23312.500000000 | 48410.000000000 | -0.165472247 | 481859.000000000 |
| 3 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-04 | 35.000000000 | 20914.542857143 | 18507.397006639 | 750.000000000 | 4750.000000000 | 15000.000000000 | 31480.500000000 | 70000.000000000 | 0.041396137 | 732009.000000000 |
| 4 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-05 | 43.000000000 | 23956.000000000 | 32423.697072622 | 700.000000000 | 4500.000000000 | 14000.000000000 | 33465.000000000 | 188756.000000000 | 15.605371463 | 1030108.000000000 |
#aporte al monto total por mes y sucursal
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill='Sucursal ')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto(CLP)') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.7, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1))
<ggplot: (139744010781)>
#plot monto total por mes por sucursal
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139863311216)>
#plot monto total por mes por sucursal y numero de registros
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139856358773)>
#plot Volumen de registros por mes por sucursal
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139741896464)>
#plot Volumen de registros por mes por sucursal y monto total
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139739964486)>
#Volumen de registros y Monto por mes y Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Monto (CLP)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139743571722)>
#Volumen de registros y Monto por mes y Sucursal
ggplot(stats, aes(x='Sucursal ',y='Fecha_de_la_transacción_year_month', fill='sum')) + geom_point(aes(size='count')) + xlab('Sucursal') + ylab('Mes') + theme_bw() + coord_flip() + theme(axis_text_x=element_text(rotation=50, hjust=1)) + labs(fill = "Monto (CLP)", size = 'N° registros')
<ggplot: (139753868916)>
#boxplot monto por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139859481099)>
#Violin monto por mes por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139871177566)>
#Mediana vs promedio por sucursal
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Sucursal ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Sucursal '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869081919)>
Monto por proyecto
#Estadisticas por Proyecto (head 5)
stats = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Proyecto | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 100 - Santiago (Quilicura) | 2022-01 | 45.000000000 | 120545.711111111 | 331877.066313624 | 745.000000000 | 1200.000000000 | 11771.000000000 | 50330.000000000 | 1598711.000000000 | 14.946562088 | 5424557.000000000 |
| 1 | 100 - Santiago (Quilicura) | 2022-02 | 22.000000000 | 55918.818181818 | 93097.727402899 | 745.000000000 | 7475.000000000 | 15500.000000000 | 57876.500000000 | 403350.000000000 | 9.489058858 | 1230214.000000000 |
| 2 | 100 - Santiago (Quilicura) | 2022-03 | 39.000000000 | 258202.574358974 | 1063163.105397802 | 1340.000000000 | 9305.000000000 | 31238.000000000 | 117957.700000000 | 6678684.000000000 | 37.743016111 | 10069900.400000000 |
| 3 | 100 - Santiago (Quilicura) | 2022-04 | 37.000000000 | 98248.675675676 | 269866.010419065 | 600.000000000 | 8104.000000000 | 22750.000000000 | 56450.000000000 | 1600819.000000000 | 28.405000347 | 3635201.000000000 |
| 4 | 100 - Santiago (Quilicura) | 2022-05 | 42.000000000 | 55868.619047619 | 73025.105750027 | 800.000000000 | 8578.000000000 | 20925.000000000 | 63827.500000000 | 332700.000000000 | 4.113420906 | 2346482.000000000 |
#top Prouectos con mayor monto por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
| Proyecto | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | ||||||||||||
| 0 | 100 - Santiago (Quilicura) | 2022-01 | 45.000000000 | 120545.711111111 | 331877.066313624 | 745.000000000 | 1200.000000000 | 11771.000000000 | 50330.000000000 | 1598711.000000000 | 14.946562088 | 5424557.000000000 |
| 1 | 570 -Temuco Municipal | 2022-01 | 23.000000000 | 235219.652173913 | 568115.863922029 | 2000.000000000 | 11925.000000000 | 24000.000000000 | 55000.000000000 | 2176000.000000000 | 7.859434367 | 5410052.000000000 |
| 2 | 550 - Puerto Montt | 2022-01 | 125.000000000 | 39680.248000000 | 127222.840524187 | 200.000000000 | 5400.000000000 | 15000.000000000 | 34000.000000000 | 981750.000000000 | 39.785697239 | 4960031.000000000 |
| 3 | 300 - Vina del Mar | 2022-01 | 101.000000000 | 23651.029702970 | 26324.282865999 | 800.000000000 | 7600.000000000 | 16898.000000000 | 34185.000000000 | 223600.000000000 | 32.640682194 | 2388754.000000000 |
| 4 | 402 - GERENCIA | 2022-01 | 10.000000000 | 235344.800000000 | 537675.869926565 | 11000.000000000 | 23768.500000000 | 50317.000000000 | 51970.000000000 | 1742650.000000000 | 9.175458189 | 2353448.000000000 |
#Top Proyecto por monto total por mes
categoria_list = stats_top['Proyecto '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proyecto '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proyecto') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750221419)>
#Contero de apariciones en el top por mes por proyecto
categoria_count = stats_top.groupby('Proyecto ')['Proyecto '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proyecto ':'Meses en top'}).reset_index()
| Proyecto | Meses en top | |
|---|---|---|
| 0 | PLACILLA -Transporte Ruta | 10 |
| 1 | 100 - Santiago (Quilicura) | 9 |
| 2 | 570 -Temuco Municipal | 9 |
| 3 | 550 - Puerto Montt | 9 |
| 4 | 500 - Concepcion | 7 |
| 5 | MARITIMO - Transporte Ruta | 6 |
| 6 | INDUSTRIAL - Transporte Ruta | 6 |
| 7 | 300 - Vina del Mar | 6 |
| 8 | 109 - Arauco | 5 |
| 9 | 120 - Las Condes | 5 |
| 10 | SANTIAGO -Transporte Ruta | 4 |
| 11 | CONCEPCION -Transporte Ruta | 4 |
| 12 | TEMUCO -Transporte Ruta | 3 |
| 13 | 170 - Penalolen Recoleccion | 3 |
| 14 | 402 - GERENCIA | 2 |
| 15 | 112 - Arauco Mapa | 2 |
| 16 | 400 - Rancagua Industrial | 2 |
| 17 | 106 - San Felipe | 1 |
| 18 | 380-ESTRUCTURA (CASA MATRIZ) | 1 |
| 19 | 180- La Reina Recoleccion | 1 |
| 20 | 160 - Maipu Zona B Recoleccion | 1 |
| 21 | 150 - Maipu Zona A Recoleccion | 1 |
| 22 | 130 - Rancagua Recoleccion | 1 |
| 23 | PUERTO MONTT -Transporte Ruta | 1 |
| 24 | SANTIAGO -Tratamiento | 1 |
#Proyectos que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)
#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses
df_top_categorias = df[df['Proyecto '].isin(categoria_count[categoria_count >= n_month].index.to_list())]
df_top_categorias['Proyecto '] = pd.Categorical(df_top_categorias['Proyecto '], categories=categoria_count[categoria_count >= n_month].index.to_list())
ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 1, scales='free') + coord_flip() + theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139753653843)>
#Estadisticas por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Proyecto ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Proyecto | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PLACILLA -Transporte Ruta | 2022-01 | 38.000000000 | 53731.894736842 | 77543.030164701 | 1400.000000000 | 9550.000000000 | 16855.000000000 | 69710.000000000 | 354724.000000000 | 5.311566664 | 2041812.000000000 |
| 1 | PLACILLA -Transporte Ruta | 2022-02 | 34.000000000 | 84421.764705882 | 80203.380774756 | 1000.000000000 | 42137.500000000 | 57500.000000000 | 91852.500000000 | 338300.000000000 | 2.886146866 | 2870340.000000000 |
| 2 | PLACILLA -Transporte Ruta | 2022-03 | 54.000000000 | 87056.000000000 | 130653.440522397 | 4080.000000000 | 16300.000000000 | 39500.000000000 | 106626.750000000 | 781900.000000000 | 16.363198385 | 4701024.000000000 |
| 3 | PLACILLA -Transporte Ruta | 2022-04 | 34.000000000 | 89087.117647059 | 65409.739816286 | 6000.000000000 | 47747.500000000 | 77575.000000000 | 125432.500000000 | 324667.000000000 | 4.275231995 | 3028962.000000000 |
| 4 | PLACILLA -Transporte Ruta | 2022-05 | 45.000000000 | 109292.000000000 | 238023.502942175 | 5500.000000000 | 35000.000000000 | 60000.000000000 | 98000.000000000 | 1619288.000000000 | 38.993021910 | 4918140.000000000 |
#plot Monto total por mes por proyecto del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750795255)>
#Monto total por proyecto y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750513121)>
#plot volumen de registros por mes por proyecto del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139742609060)>
#plot volumen de registros por mes y monto total por proyecto del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139750507124)>
#boxplot monto por mes por proyecto
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139860941248)>
#Violin monto por mes por proyecto
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139743148888)>
#Mediana vs promedio por proyecto
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proyecto ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proyecto '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Proyecto ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139862015328)>
Monto por Categoria
#Estadisticas por categoria (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ADITIVO BLUE MAX | 2022-03 | 1.000000000 | 46500.000000000 | NaN | 46500.000000000 | 46500.000000000 | 46500.000000000 | 46500.000000000 | 46500.000000000 | NaN | 46500.000000000 |
| 1 | ADITIVO BLUE MAX | 2022-04 | 2.000000000 | 24800.000000000 | 20930.360723122 | 10000.000000000 | 17400.000000000 | 24800.000000000 | 32200.000000000 | 39600.000000000 | NaN | 49600.000000000 |
| 2 | ADITIVO BLUE MAX | 2022-06 | 1.000000000 | 5010.000000000 | NaN | 5010.000000000 | 5010.000000000 | 5010.000000000 | 5010.000000000 | 5010.000000000 | NaN | 5010.000000000 |
| 3 | ADITIVO BLUE MAX | 2022-10 | 1.000000000 | 10000.000000000 | NaN | 10000.000000000 | 10000.000000000 | 10000.000000000 | 10000.000000000 | 10000.000000000 | NaN | 10000.000000000 |
| 4 | Abogados | 2022-05 | 1.000000000 | 6000.000000000 | NaN | 6000.000000000 | 6000.000000000 | 6000.000000000 | 6000.000000000 | 6000.000000000 | NaN | 6000.000000000 |
#top categorias con mayor monto por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | ||||||||||||
| 0 | Peajes | 2022-01 | 201.000000000 | 50752.786069652 | 203617.453847157 | 200.000000000 | 1950.000000000 | 15000.000000000 | 42000.000000000 | 2176000.000000000 | 84.337639371 | 10201310.000000000 |
| 1 | Manutencion - Colacion | 2022-01 | 260.000000000 | 26383.076038462 | 51245.734643259 | 886.600000000 | 5075.000000000 | 10545.000000000 | 20462.250000000 | 405590.000000000 | 21.749169761 | 6859599.770000000 |
| 2 | Impto veh. - P.circulacion - Rev. Tecnica - SOAP | 2022-01 | 86.000000000 | 69221.186046512 | 123084.973561167 | 936.000000000 | 11800.000000000 | 18500.000000000 | 64500.000000000 | 617100.000000000 | 8.827482721 | 5953022.000000000 |
| 3 | Repuestos y mantto equipos - Veh. pesados | 2022-01 | 86.000000000 | 59375.802325581 | 154160.596802229 | 720.000000000 | 8810.000000000 | 18907.500000000 | 41650.000000000 | 981750.000000000 | 23.891108849 | 5106319.000000000 |
| 4 | Peajes / Estacionamiento | 2022-01 | 115.000000000 | 41536.591304348 | 108913.953434673 | 700.000000000 | 2900.000000000 | 6750.000000000 | 24925.000000000 | 749600.000000000 | 24.595371489 | 4776708.000000000 |
#Top Categoría por monto total por mes
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Categoría') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139753960163)>
#Contero de apariciones en el top por mes por categoría
categoria_count = stats_top.groupby('Categoría ')['Categoría '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Categoría ':'Meses en top'}).reset_index()
| Categoría | Meses en top | |
|---|---|---|
| 0 | Combustibles - Diesel y gasolina | 10 |
| 1 | Peajes / Estacionamiento | 10 |
| 2 | Manutencion - Colacion | 10 |
| 3 | Peajes | 10 |
| 4 | Repuestos y mantto equipos - Veh. pesados | 9 |
| 5 | Impto veh. - P.circulacion - Rev. Tecnica - SOAP | 7 |
| 6 | Vehiculo priv-Uber, taxi, gastos traslados | 6 |
| 7 | Instalaciones - Mantencion | 6 |
| 8 | Mantenimiento Vehiculos | 6 |
| 9 | Otros gastos direct - Multas | 5 |
| 10 | Resto de coste indirecto - Multas | 3 |
| 11 | Alojamientos | 3 |
| 12 | Base operaciones - Arriendo | 3 |
| 13 | Impuestos- Notaria -Certif - Doc.Legal | 3 |
| 14 | Pasajes Aereos y terrestres al int (en Chile) | 2 |
| 15 | Otros - Correo / Fotocopia | 1 |
| 16 | Otros servicios de Asesorias | 1 |
| 17 | Materiales de oficina | 1 |
| 18 | Repuestos equipos y maquina | 1 |
| 19 | Gasolina | 1 |
| 20 | De instalac. Y obras (mant. instalaciones) | 1 |
| 21 | Neumaticos - Recauchajes | 1 |
#Categoría que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)
#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses
df_top_categorias = df[df['Categoría '].isin(categoria_count[categoria_count >= n_month].index.to_list())]
df_top_categorias['Categoría '] = pd.Categorical(df_top_categorias['Categoría '], categories=categoria_count[categoria_count >= n_month].index.to_list())
ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 1, scales='free') + coord_flip() + theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139868743981)>
#Categoría por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Combustibles - Diesel y gasolina | 2022-01 | 84.000000000 | 29576.488095238 | 42745.577354967 | 2800.000000000 | 5000.000000000 | 17819.500000000 | 37000.500000000 | 263471.000000000 | 15.079063649 | 2484425.000000000 |
| 1 | Combustibles - Diesel y gasolina | 2022-02 | 90.000000000 | 31272.700000000 | 48911.621483038 | 2000.000000000 | 5000.000000000 | 19010.000000000 | 29803.000000000 | 298737.000000000 | 16.002014184 | 2814543.000000000 |
| 2 | Combustibles - Diesel y gasolina | 2022-03 | 90.000000000 | 30672.111111111 | 62743.840831787 | 3000.000000000 | 5000.000000000 | 20000.000000000 | 30000.000000000 | 535639.000000000 | 49.657136142 | 2760490.000000000 |
| 3 | Combustibles - Diesel y gasolina | 2022-04 | 86.000000000 | 24785.267441860 | 25931.398326410 | 3000.000000000 | 9953.500000000 | 15000.000000000 | 30000.000000000 | 126035.000000000 | 5.245470202 | 2131533.000000000 |
| 4 | Combustibles - Diesel y gasolina | 2022-05 | 118.000000000 | 24245.454152542 | 27119.972963096 | 3000.000000000 | 7500.000000000 | 20000.000000000 | 27338.500000000 | 174438.000000000 | 10.761886579 | 2860963.590000000 |
#plot Monto total por mes por categoria del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139743302885)>
#Monto total por categoria y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139751074744)>
#plot volumen de registros por mes por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139862037102)>
#plot volumen de registros por mes y monto total por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139740101194)>
#boxplot monto por mes por categoria
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139740790148)>
#Violin monto por mes por categoria
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139856707065)>
#Mediana vs promedio por categoría
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Categoría ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Categoría '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139862920072)>
Monto por Proveedor
#Estadisticas por proveedor (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | "1 JUZGADO POLICIA LOCAL MAIPU " | 2022-10 | 3.000000000 | 317840.000000000 | 191571.122367125 | 178785.000000000 | 208582.500000000 | 238380.000000000 | 387367.500000000 | 536355.000000000 | NaN | 953520.000000000 |
| 1 | '-- | 2022-07 | 1.000000000 | 1200.000000000 | NaN | 1200.000000000 | 1200.000000000 | 1200.000000000 | 1200.000000000 | 1200.000000000 | NaN | 1200.000000000 |
| 2 | '-- | 2022-08 | 2.000000000 | 375295.000000000 | 529347.207464061 | 990.000000000 | 188142.500000000 | 375295.000000000 | 562447.500000000 | 749600.000000000 | NaN | 750590.000000000 |
| 3 | '--- | 2022-07 | 1.000000000 | 42317.000000000 | NaN | 42317.000000000 | 42317.000000000 | 42317.000000000 | 42317.000000000 | 42317.000000000 | NaN | 42317.000000000 |
| 4 | '--- | 2022-08 | 1.000000000 | 196162.000000000 | NaN | 196162.000000000 | 196162.000000000 | 196162.000000000 | 196162.000000000 | 196162.000000000 | NaN | 196162.000000000 |
#top proveedores con mayor monto por mes (head 10)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(10)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | ||||||||||||
| 0 | RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A | 2022-01 | 4.000000000 | 842300.000000000 | 617969.697207449 | 500000.000000000 | 500000.000000000 | 551400.000000000 | 893700.000000000 | 1766400.000000000 | 3.870873076 | 3369200.000000000 |
| 1 | BYF CORREDORES DE SEGUROS LTDA | 2022-01 | 22.000000000 | 149040.909090909 | 152476.807476356 | 16500.000000000 | 46950.000000000 | 86000.000000000 | 203625.000000000 | 617100.000000000 | 3.253351655 | 3278900.000000000 |
| 2 | Ruta de la Araucanía | 2022-01 | 2.000000000 | 1355800.000000000 | 1159937.963858413 | 535600.000000000 | 945700.000000000 | 1355800.000000000 | 1765900.000000000 | 2176000.000000000 | NaN | 2711600.000000000 |
| 3 | PEAJES | 2022-01 | 14.000000000 | 127977.142857143 | 97321.271643386 | 8500.000000000 | 33175.000000000 | 131870.000000000 | 191932.500000000 | 286330.000000000 | -1.156886909 | 1791680.000000000 |
| 4 | soc. y turismo cascada de las Animas | 2022-01 | 1.000000000 | 1742650.000000000 | NaN | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | NaN | 1742650.000000000 |
| 5 | REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... | 2022-01 | 1.000000000 | 1598711.000000000 | NaN | 1598711.000000000 | 1598711.000000000 | 1598711.000000000 | 1598711.000000000 | 1598711.000000000 | NaN | 1598711.000000000 |
| 6 | JOSE SANTOS | 2022-01 | 3.000000000 | 530343.333333333 | 396300.969760778 | 73780.000000000 | 402815.000000000 | 731850.000000000 | 758625.000000000 | 785400.000000000 | NaN | 1591030.000000000 |
| 7 | COPEC | 2022-01 | 57.000000000 | 27576.210526316 | 32635.294681053 | 2000.000000000 | 9400.000000000 | 20000.000000000 | 37004.000000000 | 192180.000000000 | 14.739997056 | 1571844.000000000 |
| 8 | VIATICOS | 2022-01 | 2.000000000 | 780129.000000000 | 1017236.744347155 | 60834.000000000 | 420481.500000000 | 780129.000000000 | 1139776.500000000 | 1499424.000000000 | NaN | 1560258.000000000 |
| 9 | BCI SEGUROS GENERALES | 2022-01 | 1.000000000 | 996946.000000000 | NaN | 996946.000000000 | 996946.000000000 | 996946.000000000 | 996946.000000000 | 996946.000000000 | NaN | 996946.000000000 |
#Top Proveedores por monto total por mes
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proveedor') + ylab('Monto(CLP)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139879355323)>
#Contero de apariciones en el top por mes por proveedor
categoria_count = stats_top.groupby('Proveedor ')['Proveedor '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proveedor ':'Meses en top'}).reset_index()
| Proveedor | Meses en top | |
|---|---|---|
| 0 | PEAJES | 10 |
| 1 | COPEC | 9 |
| 2 | ruta de la araucania soc. concesionaria s.a | 6 |
| 3 | Ruta del bosque sociedad concesionaria s.a | 6 |
| 4 | VIATICOS | 4 |
| 5 | JOSE SANTOS | 4 |
| 6 | Copec | 4 |
| 7 | MUNICIPALIDAD DE QUILICURA | 3 |
| 8 | I.M.QUILICURA | 3 |
| 9 | DANTE ANDRES AMIGO MOLINA | 3 |
| 10 | Veolia | 2 |
| 11 | Terminal Portuario De Valparaiso S.A. | 2 |
| 12 | JOSE LUIS SANTOS GONZALEZ | 2 |
| 13 | soc. y turismo cascada de las Animas | 2 |
| 14 | Esmax Red Limitada | 1 |
| 15 | GLADYS FABIOLA ILUFI | 1 |
| 16 | REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... | 1 |
| 17 | RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A | 1 |
| 18 | Ruta de la Araucanía | 1 |
| 19 | Ruta del Bosque | 1 |
| 20 | CARTONI | 1 |
| 21 | SERVICIO AGRICOLA GANADERO | 1 |
| 22 | SERVICIOS GASTRONOMICOS ESTER | 1 |
| 23 | TERMINAL CERROS DE VALPARAISO SA | 1 |
| 24 | TERMINAL PORTUARIO DE VALPARAISO | 1 |
| 25 | Bci | 1 |
| 26 | Transporte chamorro e hijos spa | 1 |
| 27 | BYF CORREDORES DE SEGUROS LTDA | 1 |
| 28 | BCI SEGUROS GENERALES | 1 |
| 29 | W Boston Hotel | 1 |
| 30 | ATTON VITACURA SPA | 1 |
| 31 | ruta del bosque soc. concesionaria s.a | 1 |
| 32 | ruta del bosque sociedad consecionaria s.a | 1 |
| 33 | REEMBOLSO PARA ASESORIAS JURIDICAS Y PROFESION... | 1 |
| 34 | COMERCIAL Y SERVICIOS G Y F LTDA. | 1 |
| 35 | PATENTE MUNICIPAL | 1 |
| 36 | JOSÉ LUIS SANTOS GONZÁLEZ | 1 |
| 37 | I.M.QUILICDURA | 1 |
| 38 | Eco Portuaria de Residuos SPA | 1 |
| 39 | I.MUNICIPALIDAD QUILICURA CC 112 | 1 |
| 40 | I.MUNICIPALIDAD QUILICURA CC 130 | 1 |
| 41 | Ilustre Municipalidad de Requinoa | 1 |
| 42 | Cartoni Vehiculos Industriales Limitada | 1 |
| 43 | CURIFOR | 1 |
| 44 | José Santos González | 1 |
| 45 | Notario Santiago Andres Felipe Rieutord Alvarado | 1 |
| 46 | MAESTRANZA Y PROYECTOS DE INGENIERIA Y MONTAJE... | 1 |
| 47 | MUNICIPALIDAD D QUILICURA | 1 |
| 48 | 1 Y 2 JUZGADO | 1 |
| 49 | MUNICIPALIDAD DE QUILICURA CC 100 | 1 |
| 50 | MUNICIPALIDAD DE SAN FELIPE | 1 |
| 51 | MUNICIPALIDAD QUILICURA | 1 |
| 52 | Ministerio de Salud | 1 |
| 53 | 1 JUZGADO POLICIA LOCAL MAIPU | 1 |
#Proveedores que se repiten en el top mensual segun criterio (como minimo estar en el top la mitad de los meses)
n_month = 2 #Criterio, como minimo estar en el top 2 meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses
df_top_categorias = df[df['Proveedor '].isin(categoria_count[categoria_count >= n_month].index.to_list())]
df_top_categorias['Proveedor '] = pd.Categorical(df_top_categorias['Proveedor '], categories=categoria_count[categoria_count >= n_month].index.to_list())
ggplot(df_top_categorias, aes(y='Cantidad ', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto(CLP)') + facet_wrap('Proveedor ', ncol = 1, scales='free') + coord_flip() + theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139753682386)>
#Proveedor por poyecto del top mes a mes
stats = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].describe()
stats['kurt'] = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PEAJES | 2022-01 | 14.000000000 | 127977.142857143 | 97321.271643386 | 8500.000000000 | 33175.000000000 | 131870.000000000 | 191932.500000000 | 286330.000000000 | -1.156886909 | 1791680.000000000 |
| 1 | PEAJES | 2022-02 | 26.000000000 | 93268.846153846 | 107457.669594196 | 4300.000000000 | 24655.000000000 | 38070.000000000 | 128475.000000000 | 403350.000000000 | 2.148855632 | 2424990.000000000 |
| 2 | PEAJES | 2022-03 | 43.000000000 | 109150.697674419 | 70188.884935930 | 6200.000000000 | 70400.000000000 | 99950.000000000 | 134025.000000000 | 351800.000000000 | 2.425672815 | 4693480.000000000 |
| 3 | PEAJES | 2022-04 | 36.000000000 | 108940.166666667 | 72995.463198945 | 3750.000000000 | 60204.500000000 | 103975.000000000 | 143326.000000000 | 269600.000000000 | -0.111510536 | 3921846.000000000 |
| 4 | PEAJES | 2022-05 | 55.000000000 | 75799.345454545 | 73350.073709529 | 6450.000000000 | 21610.000000000 | 51250.000000000 | 99580.000000000 | 372400.000000000 | 6.027163088 | 4168964.000000000 |
#plot Monto total por mes por proveedor del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139872382883)>
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139863332555)>
#plot volumen de registros por mes por proveedor del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139751265695)>
#plot volumen de registros por mes y monto total por proveedor del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = 'sum')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° registros') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Monto (CLP)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139872343860)>
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139741146362)>
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139747279014)>
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proveedor ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proveedor '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139751251788)>
Tiempo total
#Estadisticas de tiempo de aparicion de un nuevo registro
df.sort_values('Fecha de la transacción ')['Fecha de la transacción '].diff().describe().to_frame()
| Fecha de la transacción | |
|---|---|
| count | 17934 |
| mean | 0 days 00:24:19.752425560 |
| std | 0 days 03:06:12.787722180 |
| min | 0 days 00:00:00 |
| 25% | 0 days 00:00:00 |
| 50% | 0 days 00:00:00 |
| 75% | 0 days 00:00:00 |
| max | 2 days 00:00:00 |
#Estadisticas de delta Fecha aprobación y transacción
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-01 | 1555 | 18 days 21:10:57.197054662 | 24 days 19:34:25.215532252 | 0 days 12:50:17.203000 | 3 days 22:16:23.328500 | 9 days 14:58:33.800000 | 22 days 14:53:28.678500 | 228 days 21:13:48.960000 |
| 1 | 2022-02 | 1456 | 19 days 11:19:40.332569368 | 29 days 02:34:24.933876799 | 0 days 14:02:14.193000 | 3 days 14:27:01.956000 | 7 days 16:01:03.323500 | 21 days 13:11:15.115000 | 270 days 16:53:33.807000 |
| 2 | 2022-03 | 1940 | 17 days 10:52:43.102945360 | 24 days 11:28:06.874882260 | -1 days +21:52:47.503000 | 3 days 00:08:24.157500 | 7 days 22:17:33.720000 | 20 days 11:44:42.670500 | 198 days 22:08:33.350000 |
| 3 | 2022-04 | 1769 | 15 days 00:54:28.449754663 | 19 days 07:15:59.656174233 | -1 days +23:42:33.843000 | 3 days 15:45:14.700000 | 7 days 17:04:33.027000 | 19 days 16:23:36.283000 | 186 days 23:14:41.357000 |
| 4 | 2022-05 | 1872 | 14 days 08:10:16.572301816 | 19 days 19:42:17.009894342 | 0 days 14:13:59.123000 | 2 days 19:54:54.715000 | 6 days 18:28:19.267000 | 16 days 17:45:45.241000 | 162 days 19:47:10.223000 |
| 5 | 2022-06 | 1819 | 14 days 18:42:25.959337548 | 18 days 05:20:42.682629312 | 0 days 14:19:43.453000 | 3 days 21:14:01.320000 | 7 days 18:32:58.383000 | 17 days 14:59:20.353000 | 145 days 15:10:20.287000 |
| 6 | 2022-07 | 1741 | 14 days 08:23:38.146231476 | 16 days 11:17:39.340641971 | 0 days 08:42:10.987000 | 3 days 20:48:04.517000 | 7 days 18:54:53.960000 | 19 days 15:59:42.053000 | 119 days 21:01:24.317000 |
| 7 | 2022-08 | 2074 | 13 days 05:54:44.148643201 | 16 days 05:42:17.130599143 | 0 days 15:04:06.823000 | 3 days 15:04:10.416750 | 6 days 21:00:56.403500 | 14 days 18:29:41.061750 | 89 days 15:05:07.150000 |
| 8 | 2022-09 | 1903 | 11 days 19:54:49.191524960 | 11 days 09:30:28.702765606 | 0 days 16:03:17.837000 | 3 days 23:59:29.065000 | 7 days 13:56:15.163000 | 14 days 20:53:30.908500 | 67 days 15:42:43.480000 |
| 9 | 2022-10 | 1726 | 7 days 17:38:24.388206836 | 6 days 04:05:16.539321157 | 0 days 13:50:08.760000 | 3 days 00:02:42.713250 | 6 days 01:05:43.180000 | 10 days 16:54:13.847750 | 35 days 19:17:31.800000 |
# Mediana de delta Fecha aprobación y transacción por mes
stats = df.groupby(['Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = '50%')) + geom_bar(stat='sum', fill='#0C475B') + coord_flip() + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(legend_position="none")
<ggplot: (139752120146)>
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = '50%', fill='count')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + guides(size = False) + labs(fill = "N° registros")
<ggplot: (139868888068)>
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes
ggplot(stats, aes(x = 'Fecha_de_la_transacción_year_month', y = 'count', fill='50%')) + geom_bar(stat='sum') + coord_flip() + xlab('Mes') + ylab('N° de registros') + theme_bw() + guides(size = False) + labs(fill = "Tiempo (dias)")
<ggplot: (139872368223)>
#boxplot de delta Fecha aprobación y transacción por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
<ggplot: (139866649006)>
#Violin de delta Fecha aprobación y transacción por mes
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int ')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + theme(axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
<ggplot: (139869294271)>
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
<ggplot: (139741668860)>
Tiempo por tipo de documento
#Estadisticas de delta fecha aprobacon y transaccion por mes y tipo de documento (head 10)
stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
| Tipo Documento | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | CHL - Boleta | 2022-03 | 1 | 188 days 13:26:38.007000 | NaT | 188 days 13:26:38.007000 | 188 days 13:26:38.007000 | 188 days 13:26:38.007000 | 188 days 13:26:38.007000 | 188 days 13:26:38.007000 |
| 1 | CHL - Boleta | 2022-04 | 1 | 186 days 23:14:41.357000 | NaT | 186 days 23:14:41.357000 | 186 days 23:14:41.357000 | 186 days 23:14:41.357000 | 186 days 23:14:41.357000 | 186 days 23:14:41.357000 |
| 2 | CHL - Boleta | 2022-05 | 1 | 162 days 19:47:10.223000 | NaT | 162 days 19:47:10.223000 | 162 days 19:47:10.223000 | 162 days 19:47:10.223000 | 162 days 19:47:10.223000 | 162 days 19:47:10.223000 |
| 3 | CHL - Boleta | 2022-06 | 4 | 97 days 13:44:48.523500 | 16 days 15:46:46.064714821 | 88 days 13:51:18.990000 | 89 days 07:50:53.685000 | 89 days 13:58:45.588500 | 97 days 19:52:40.427000 | 122 days 13:10:23.927000 |
| 4 | CHL - Boleta | 2022-07 | 20 | 87 days 10:32:07.775200 | 12 days 15:26:21.387318070 | 70 days 13:39:14.393000 | 81 days 03:20:17.999750 | 82 days 03:20:44.356500 | 86 days 06:28:15.724000 | 116 days 19:39:16.480000 |
| 5 | CHL - Boleta | 2022-08 | 29 | 54 days 14:06:52.436586207 | 13 days 02:09:39.611182136 | 28 days 19:44:45.490000 | 48 days 03:20:44.307000 | 54 days 03:20:44.457000 | 64 days 13:39:14.380000 | 85 days 20:18:29.310000 |
| 6 | CHL - Boleta | 2022-09 | 135 | 19 days 06:48:08.177185185 | 14 days 18:57:52.318220474 | 1 days 16:46:56.587000 | 5 days 23:00:51.081500 | 14 days 13:50:08.770000 | 30 days 16:38:54.052000 | 62 days 12:58:06.420000 |
| 7 | CHL - Boleta | 2022-10 | 320 | 7 days 11:46:46.143328125 | 5 days 07:25:03.620069586 | 0 days 17:48:29.817000 | 3 days 18:09:26.590000 | 6 days 13:22:39.926500 | 9 days 17:43:27.669750 | 29 days 13:27:32.040000 |
| 8 | CHL - Boleta de Honorario | 2022-09 | 6 | 17 days 19:04:37.852666666 | 16 days 06:12:07.804726302 | 4 days 13:13:44.107000 | 8 days 19:13:41.348000 | 14 days 06:21:19.053000 | 16 days 10:14:52.883500 | 49 days 13:27:31.963000 |
| 9 | CHL - Boleta de Honorario | 2022-10 | 10 | 10 days 16:07:58.081000 | 7 days 07:50:11.388938659 | 1 days 16:25:19.957000 | 6 days 02:57:53.589500 | 9 days 02:23:45.365000 | 12 days 11:45:41.438250 | 24 days 21:43:49.493000 |
#Delta fecha aprobacon y transaccion por mes por tipo de documento
stats = df.groupby(['Tipo Documento', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139872950762)>
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por tipo de documento
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139752930154)>
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por tipo de documento
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139872930621)>
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y Tipo de documento
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139752887333)>
#boxplot de delta Fecha aprobación y transacción por mes por Tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139865065067)>
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139861959762)>
#Mediana vs promedio de delta aprobacion y transaccion por tipo de documento
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Tipo Documento', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Tipo Documento'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Tipo Documento', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139861614091)>
Tiempo por Fondo fijo / Anticipo / Reembolso
#Estadisticas delta fecha aprobacion y transaccion por fondo fijo
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
| Fondo fijo | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANTICIPO (VxR) | 2022-01 | 57 | 42 days 12:18:29.786263158 | 42 days 07:01:22.614423353 | 0 days 19:58:53.307000 | 3 days 21:26:10.023000 | 21 days 15:24:56.447000 | 86 days 18:30:49.597000 | 112 days 18:31:03.363000 |
| 1 | ANTICIPO (VxR) | 2022-02 | 48 | 45 days 11:53:03.967416666 | 30 days 20:19:12.781808137 | 0 days 16:21:41.257000 | 5 days 15:59:18.485250 | 61 days 18:30:50.125000 | 69 days 02:38:27.969500 | 83 days 17:37:37.997000 |
| 2 | ANTICIPO (VxR) | 2022-03 | 165 | 19 days 08:35:53.049606060 | 20 days 23:44:49.244160100 | 0 days 16:48:36.133000 | 1 days 22:51:14.913000 | 12 days 19:15:32.710000 | 28 days 19:59:26.770000 | 126 days 23:37:35.993000 |
| 3 | ANTICIPO (VxR) | 2022-04 | 130 | 10 days 08:35:51.068430769 | 15 days 04:55:28.155007794 | 0 days 17:05:40.427000 | 1 days 23:31:45.774000 | 4 days 00:29:43.963000 | 12 days 08:55:05.496000 | 106 days 17:00:23.360000 |
| 4 | ANTICIPO (VxR) | 2022-05 | 96 | 7 days 00:14:06.055718750 | 16 days 05:25:24.551002415 | 0 days 18:51:46.613000 | 0 days 23:21:46.061750 | 1 days 15:59:55.620000 | 3 days 16:07:03.498000 | 85 days 17:03:57.133000 |
| 5 | ANTICIPO (VxR) | 2022-06 | 120 | 17 days 13:09:39.805350 | 20 days 22:27:19.819347349 | 0 days 15:59:27.717000 | 3 days 19:09:04.298500 | 7 days 17:34:00.488500 | 21 days 21:40:16.892000 | 80 days 22:47:48.973000 |
| 6 | ANTICIPO (VxR) | 2022-07 | 57 | 19 days 11:55:20.740631579 | 23 days 19:53:50.990367748 | 0 days 15:40:20.123000 | 4 days 17:22:37.470000 | 9 days 17:00:17.203000 | 21 days 21:01:59.157000 | 101 days 14:11:19.690000 |
| 7 | ANTICIPO (VxR) | 2022-08 | 90 | 5 days 15:06:44.713022222 | 8 days 01:50:56.447153996 | 0 days 16:59:50.297000 | 1 days 00:05:36.224250 | 3 days 14:21:31.158500 | 8 days 17:02:45.045500 | 53 days 13:37:09.197000 |
| 8 | ANTICIPO (VxR) | 2022-09 | 36 | 9 days 02:19:57.194500 | 6 days 20:50:40.054437503 | 0 days 21:26:11.893000 | 3 days 11:49:34.940000 | 7 days 10:49:34.060000 | 14 days 18:35:37.204250 | 30 days 02:29:38.807000 |
| 9 | ANTICIPO (VxR) | 2022-10 | 108 | 4 days 10:10:08.426481481 | 5 days 11:13:45.387557375 | 0 days 15:00:15.863000 | 1 days 16:37:23.112000 | 2 days 14:20:40.833000 | 5 days 14:52:20.294750 | 34 days 19:52:04.457000 |
#plot delta medio de fecha de aprobacion y transaccion por mes y fondo fijo
stats = df.groupby(['Fondo fijo', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139877711032)>
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por fondo fijo
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139879453597)>
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por fondo fijo
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139879534406)>
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139879545160)>
#boxplot de delta Fecha aprobación y transacción por mes por Tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139879976036)>
#Violin monto por mes por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139880385786)>
#Mediana vs promedio de delta aprobacion y transaccion por Fondo fijo
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Fondo fijo', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Fondo fijo'], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Fondo fijo', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139878471374)>
Tiempo por Sucursal
#Estadisticas delta fecha aprobacion y transaccion por sucursal (head 10)
stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(10)
| Sucursal | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-01 | 28 | 57 days 23:38:26.290428571 | 13 days 17:45:14.510641479 | 43 days 12:46:05.287000 | 50 days 18:54:36.242500 | 57 days 12:46:03.315000 | 59 days 19:45:46.077000 | 117 days 13:50:32.827000 |
| 1 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-02 | 19 | 39 days 10:27:45.428736842 | 29 days 16:27:48.329952471 | 2 days 13:47:50.523000 | 7 days 13:47:43.631500 | 35 days 17:55:44.387000 | 54 days 17:55:44.350000 | 102 days 13:50:32.863000 |
| 2 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-03 | 30 | 37 days 06:00:26.199200 | 31 days 05:24:19.004548532 | 5 days 14:21:29.950000 | 28 days 14:58:06.312500 | 35 days 17:28:04.481500 | 42 days 07:57:52.253500 | 188 days 14:48:12.293000 |
| 3 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-04 | 35 | 33 days 12:03:53.701628571 | 46 days 07:24:17.980019026 | 0 days 19:57:52.253000 | 5 days 02:48:55.950000 | 22 days 13:50:32.833000 | 30 days 13:50:32.847000 | 154 days 17:58:06.410000 |
| 4 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-05 | 43 | 22 days 04:23:36.475837209 | 25 days 07:03:21.698097645 | 1 days 13:50:32.893000 | 7 days 17:33:32.812000 | 17 days 13:50:04.303000 | 22 days 21:23:48.495000 | 130 days 17:59:06.610000 |
| 5 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-06 | 31 | 32 days 11:21:37.081516129 | 21 days 00:09:50.005022663 | 0 days 21:17:01.363000 | 6 days 15:04:31.781500 | 41 days 14:03:13.327000 | 43 days 02:03:13.336500 | 94 days 22:38:25.903000 |
| 6 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-07 | 34 | 31 days 02:06:18.513176470 | 14 days 09:59:56.946721544 | 6 days 14:57:48.287000 | 21 days 20:57:48.192500 | 30 days 14:40:26.686500 | 43 days 02:40:26.651750 | 76 days 17:57:27.143000 |
| 7 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-08 | 26 | 35 days 15:54:04.329884615 | 15 days 18:30:51.527351213 | 3 days 14:40:26.867000 | 26 days 04:38:40.417000 | 33 days 23:00:41.316500 | 43 days 10:37:32.876750 | 85 days 20:18:29.310000 |
| 8 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-09 | 45 | 20 days 22:08:18.546200 | 12 days 12:21:56.435051992 | 1 days 13:26:34.960000 | 12 days 22:38:26.290000 | 19 days 18:38:10.653000 | 31 days 13:51:58.933000 | 47 days 14:39:21.507000 |
| 9 | GESTION INTEGRAL DE RESIDUOS SPA | 2022-10 | 43 | 8 days 22:41:21.350465116 | 5 days 21:36:42.481133790 | 1 days 00:13:23.707000 | 4 days 06:55:24.408500 | 7 days 13:21:03.310000 | 14 days 01:52:34.205000 | 21 days 15:32:55.823000 |
#plot delta medio de fecha de aprobacion y transaccion por mes y sucursal
stats = df.groupby(['Sucursal ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139751054430)>
# Mediana de delta Fecha aprobación y transacción y volumetria de registros por mes por sucursal
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° Registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869304430)>
# volumetria de registros de delta Fecha aprobación y transacción y Mediana por mes por sucursal
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139855331289)>
#Volumen de registros y Mediana de delta Fecha aprobación y transacción por mes y sucursal
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', fill='50%')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)", size = 'N° registros')
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139741147395)>
#boxplot de delta Fecha aprobación y transacción por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869284007)>
#Violin monto por mes por sucursal
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 80 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139869929689)>
#Mediana vs promedio de delta aprobacion y transaccion por Sucursal
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Sucursal ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Sucursal '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139880470312)>
Tiempo por Categoria
#Estadisticas delta fecha aprobacion y transaccion por sucursal (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ADITIVO BLUE MAX | 2022-03 | 1 | 29 days 15:55:12.203000 | NaT | 29 days 15:55:12.203000 | 29 days 15:55:12.203000 | 29 days 15:55:12.203000 | 29 days 15:55:12.203000 | 29 days 15:55:12.203000 |
| 1 | ADITIVO BLUE MAX | 2022-04 | 2 | 2 days 06:52:34.051500 | 2 days 07:15:24.083663809 | 0 days 15:48:13.483000 | 1 days 11:20:23.767250 | 2 days 06:52:34.051500 | 3 days 02:24:44.335750 | 3 days 21:56:54.620000 |
| 2 | ADITIVO BLUE MAX | 2022-06 | 1 | 6 days 18:35:34.717000 | NaT | 6 days 18:35:34.717000 | 6 days 18:35:34.717000 | 6 days 18:35:34.717000 | 6 days 18:35:34.717000 | 6 days 18:35:34.717000 |
| 3 | ADITIVO BLUE MAX | 2022-10 | 1 | 1 days 21:33:58.767000 | NaT | 1 days 21:33:58.767000 | 1 days 21:33:58.767000 | 1 days 21:33:58.767000 | 1 days 21:33:58.767000 | 1 days 21:33:58.767000 |
| 4 | Abogados | 2022-05 | 1 | 19 days 20:20:31.667000 | NaT | 19 days 20:20:31.667000 | 19 days 20:20:31.667000 | 19 days 20:20:31.667000 | 19 days 20:20:31.667000 | 19 days 20:20:31.667000 |
#top categorias con mayor delta medio de fecha aprobacion y transaccion por mes (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(10)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| Index | ||||||||||
| 0 | Repuestos equipos y maquinaria | 2022-01 | 1 | 63 days 15:16:47.693000 | NaT | 63 days 15:16:47.693000 | 63 days 15:16:47.693000 | 63 days 15:16:47.693000 | 63 days 15:16:47.693000 | 63 days 15:16:47.693000 |
| 1 | Materiales seguridad (Lentes, mascara, chaleco... | 2022-01 | 2 | 49 days 01:50:07.913500 | 59 days 12:28:55.021935637 | 6 days 23:44:46.527000 | 28 days 00:47:27.220250 | 49 days 01:50:07.913500 | 70 days 02:52:48.606750 | 91 days 03:55:29.300000 |
| 2 | Gastos de formacion y capacitaciones | 2022-01 | 2 | 46 days 02:14:02.323500 | 20 days 11:59:50.033559569 | 31 days 14:20:21.840000 | 38 days 20:17:12.081750 | 46 days 02:14:02.323500 | 53 days 08:10:52.565250 | 60 days 14:07:42.807000 |
| 3 | Resto de coste indirecto - Multas | 2022-01 | 3 | 53 days 19:46:11.259000 | 33 days 02:43:15.739928576 | 32 days 14:20:48.307000 | 34 days 17:54:14.232000 | 36 days 21:27:40.157000 | 64 days 10:28:52.735000 | 91 days 23:30:05.313000 |
| 4 | Alquiler estacionamiento | 2022-01 | 6 | 42 days 20:08:46.656666666 | 39 days 13:24:02.406469751 | 5 days 16:30:34.813000 | 10 days 10:44:07.576500 | 31 days 01:57:42.697000 | 78 days 04:23:38.434250 | 91 days 17:34:46.850000 |
| 5 | Transporte | 2022-01 | 19 | 31 days 07:28:13.975789473 | 19 days 02:23:50.853082166 | 3 days 17:48:35.467000 | 20 days 03:15:52.956500 | 26 days 15:15:52.943000 | 44 days 18:13:38.367000 | 63 days 20:00:17.633000 |
| 6 | Peajes / estacionamiento | 2022-01 | 7 | 67 days 15:56:16.963142857 | 81 days 10:43:58.391147449 | 4 days 15:35:56.663000 | 10 days 14:12:17.493000 | 26 days 15:15:52.873000 | 96 days 03:31:52.630000 | 228 days 21:13:48.960000 |
| 7 | Peajes (Estructura) | 2022-01 | 71 | 35 days 03:24:32.117225352 | 34 days 06:55:34.085707294 | 1 days 15:02:04.650000 | 13 days 15:20:22.148000 | 26 days 01:05:43.193000 | 35 days 15:50:08.343500 | 118 days 03:55:29.213000 |
| 8 | Resto de coste indirec-Atencion y Aportes Social | 2022-01 | 4 | 37 days 06:16:25.966500 | 42 days 13:26:59.600107242 | 2 days 00:50:17.343000 | 7 days 18:55:45.878250 | 25 days 15:43:15.745000 | 55 days 03:03:55.833250 | 95 days 16:48:55.033000 |
| 9 | Otros gastos direct - Multas | 2022-01 | 4 | 23 days 01:28:03.363500 | 17 days 12:25:17.193104877 | 3 days 15:02:21.947000 | 10 days 14:01:07.067000 | 24 days 21:33:49.100000 | 37 days 09:00:45.396500 | 38 days 19:42:13.307000 |
#Top proveedores por delta medio fecha aprobacion y transaccion por mes
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe().reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Categoría') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139881653212)>
#Contero de apariciones en el top por mes por categoria
categoria_count = stats_top.groupby('Categoría ')['Categoría '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Categoría ':'Meses en top'}).reset_index()
| Categoría | Meses en top | |
|---|---|---|
| 0 | Peajes (Estructura) | 8 |
| 1 | Mantenim. Calderas y Quemadores | 7 |
| 2 | Viveres (Aarticulos de aseo) | 5 |
| 3 | Gastos de formacion y capacitaciones | 4 |
| 4 | Gastos Medicos | 4 |
| 5 | Materiales seguridad (Lentes, mascara, chaleco... | 4 |
| 6 | Guantes | 4 |
| 7 | Transporte | 4 |
| 8 | Resto de coste indirec-Atencion y Aportes Social | 3 |
| 9 | Mantencion de vehiculos | 3 |
| 10 | Peajes / estacionamiento | 3 |
| 11 | Formaciones y capacitaciones | 3 |
| 12 | Bolsas para limpieza | 3 |
| 13 | Alquiler estacionamiento | 3 |
| 14 | Repuestos equipos y maquinaria | 3 |
| 15 | Servicios Internet | 2 |
| 16 | Seguridad y Salud Ocupacional | 2 |
| 17 | Publicidad (Estructura) | 2 |
| 18 | Suscripciones y correo | 2 |
| 19 | Mantenimient Equipos oficina (TI) | 2 |
| 20 | Viveres (Articulos aseo, y despenza base) | 2 |
| 21 | Aceite Hidraulico | 2 |
| 22 | Gasoleo - Diesel y gasolina | 2 |
| 23 | Energia Electrica | 2 |
| 24 | Herramientas de taller | 1 |
| 25 | En serv. Locomocion publica | 1 |
| 26 | Aceite Motor | 1 |
| 27 | Base operaciones - Arriendo | 1 |
| 28 | Servicios de mensajeria | 1 |
| 29 | COSTO DE INTEGRACION SONATE | 1 |
| 30 | Compra de agua | 1 |
| 31 | Resto de coste indirecto - Multas | 1 |
| 32 | De instalac. Y obras (mant. instalaciones) | 1 |
| 33 | Diesel | 1 |
| 34 | Publicidad | 1 |
| 35 | Abogados | 1 |
| 36 | Gas y otros | 1 |
| 37 | Otros servicios de Asesorias | 1 |
| 38 | Otros mat. de impermeabilizacion | 1 |
| 39 | Materiales seguridad (Mascaras - lentes - guan... | 1 |
| 40 | Materiales de oficina - TI | 1 |
| 41 | Mantenimiento oficinas e instalaciones | 1 |
| 42 | Mantenimiento Vehiculos | 1 |
| 43 | Gasolina | 1 |
| 44 | ADITIVO BLUE MAX | 1 |
#Cateogrias que se repiten en el top mensual segun criterio (como minimo estar en el top tres meses)
#n_month = 2 #Criterio, como minimo estar en el top 2 meses
n_month = 4 #Criterio, como minimo estar en el top 2 meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses
df_top_categorias = df[df['Categoría '].isin(categoria_count[categoria_count >= n_month].index.to_list())]
df_top_categorias['Categoría '] = pd.Categorical(df_top_categorias['Categoría '], categories=categoria_count[categoria_count >= n_month].index.to_list())
ggplot(df_top_categorias, aes(y='delta_Fecha_aprobación_transacción_day_int', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 1, scales='free') + coord_flip() + theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139883091497)>
#Estadisticas de delta fecha aprobacion y transaccion por categoria del top mes a mes
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Peajes (Estructura) | 2022-01 | 71 | 35 days 03:24:32.117225352 | 34 days 06:55:34.085707294 | 1 days 15:02:04.650000 | 13 days 15:20:22.148000 | 26 days 01:05:43.193000 | 35 days 15:50:08.343500 | 118 days 03:55:29.213000 |
| 1 | Peajes (Estructura) | 2022-02 | 79 | 35 days 01:37:29.030240506 | 31 days 13:01:11.303943661 | 1 days 12:06:09.603000 | 13 days 20:38:08.403500 | 15 days 20:38:34.030000 | 60 days 10:25:48.743000 | 102 days 22:53:38.410000 |
| 2 | Peajes (Estructura) | 2022-03 | 47 | 38 days 05:00:27.629978723 | 21 days 19:53:06.432699643 | 3 days 14:28:38.920000 | 27 days 12:57:04.580000 | 38 days 23:16:31.800000 | 53 days 10:31:18.838000 | 92 days 19:14:05.997000 |
| 3 | Peajes (Estructura) | 2022-04 | 82 | 30 days 22:25:04.305024390 | 18 days 06:22:31.090776525 | 2 days 17:03:04.200000 | 17 days 17:52:22.337000 | 31 days 09:19:46.612000 | 41 days 22:11:49.782500 | 103 days 17:52:57.763000 |
| 4 | Peajes (Estructura) | 2022-05 | 97 | 36 days 14:45:22.203649484 | 24 days 20:34:49.770261797 | 3 days 15:33:14.493000 | 14 days 23:37:35.143000 | 39 days 03:20:38.250000 | 47 days 20:40:40.010000 | 111 days 13:43:07.960000 |
#plot Mediana del delta de aprobacion y transaccion por mes por categoria del top mes a mes
stats = df_top_categorias.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 1 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139883947063)>
#Mediana del delta de fecha aprobacion y transaccion por categoria y por volumen de registros del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 1 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139740093879)>
#plot volumen de registros por mes por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139752888158)>
#plot volumen de registros por mes y mediana del delta de aprobacion y transaccion por categoria del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° Registros') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139881046434)>
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139880572691)>
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_ydensity : Removed 5 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139883736917)>
#Mediana vs promedio por Categoría
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Categoría ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Categoría '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Categoría ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_sum : Removed 2 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139893425873)>
Tiempo por Proveedor
#Estadisticas delta fecha aprobacion y transaccion por proveedor (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | "1 JUZGADO POLICIA LOCAL MAIPU " | 2022-10 | 2 | 2 days 14:20:40.748000 | 0 days 00:00:00.176776695 | 2 days 14:20:40.623000 | 2 days 14:20:40.685500 | 2 days 14:20:40.748000 | 2 days 14:20:40.810500 | 2 days 14:20:40.873000 |
| 1 | '-- | 2022-07 | 1 | 1 days 17:22:57.327000 | NaT | 1 days 17:22:57.327000 | 1 days 17:22:57.327000 | 1 days 17:22:57.327000 | 1 days 17:22:57.327000 | 1 days 17:22:57.327000 |
| 2 | '-- | 2022-08 | 2 | 6 days 05:50:51.938500 | 6 days 07:54:06.061709578 | 1 days 18:26:13.380000 | 4 days 00:08:32.659250 | 6 days 05:50:51.938500 | 8 days 11:33:11.217750 | 10 days 17:15:30.497000 |
| 3 | '--- | 2022-07 | 1 | 27 days 17:11:22.077000 | NaT | 27 days 17:11:22.077000 | 27 days 17:11:22.077000 | 27 days 17:11:22.077000 | 27 days 17:11:22.077000 | 27 days 17:11:22.077000 |
| 4 | '--- | 2022-08 | 1 | 1 days 15:38:36.550000 | NaT | 1 days 15:38:36.550000 | 1 days 15:38:36.550000 | 1 days 15:38:36.550000 | 1 days 15:38:36.550000 | 1 days 15:38:36.550000 |
#top proveedores con mayor delta medio de fecha aprobacion y transaccion por mes (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe().reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby('Fecha_de_la_transacción_year_month').head(10)
stats_top.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| Index | ||||||||||
| 0 | Ruta La Araucanía S.A. | 2022-01 | 1.000000000 | 208.000000000 | NaN | 208.000000000 | 208.000000000 | 208.000000000 | 208.000000000 | 208.000000000 |
| 1 | China King | 2022-01 | 1.000000000 | 207.000000000 | NaN | 207.000000000 | 207.000000000 | 207.000000000 | 207.000000000 | 207.000000000 |
| 2 | Claudio DOugnac | 2022-01 | 1.000000000 | 184.000000000 | NaN | 184.000000000 | 184.000000000 | 184.000000000 | 184.000000000 | 184.000000000 |
| 3 | CENCOSUD EASY | 2022-01 | 1.000000000 | 140.000000000 | NaN | 140.000000000 | 140.000000000 | 140.000000000 | 140.000000000 | 140.000000000 |
| 4 | Eco kleen car wash | 2022-01 | 1.000000000 | 117.000000000 | NaN | 117.000000000 | 117.000000000 | 117.000000000 | 117.000000000 | 117.000000000 |
#Top Proveedores por monto total por mes
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string')
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'].astype('string'), categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Proveedor') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month', scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139892467974)>
#Contero de apariciones en el top por mes por proveedor
categoria_count = stats_top.groupby('Proveedor ')['Proveedor '].count().sort_values(ascending=False)
categoria_count.to_frame().rename(columns = {'Proveedor ':'Meses en top'}).reset_index()
| Proveedor | Meses en top | |
|---|---|---|
| 0 | soc. c. de los lagos | 2 |
| 1 | ESMAX RED LIMITADA | 2 |
| 2 | Rodrigo Naranjo Alborta E.i.r.l | 1 |
| 3 | Parking time | 1 |
| 4 | Panda Oriental Ltda | 1 |
| ... | ... | ... |
| 93 | Eco Kleen Car Wash Spa | 1 |
| 94 | Eco Kleen Car Wash | 1 |
| 95 | Eco Kleen Car Hash Spa | 1 |
| 96 | ESTACIONAMIENTOS JAVIER NOVOA ORTEGA EIRL | 1 |
| 97 | La Junta Ltda. | 1 |
98 rows × 2 columns
#Proveedores que se repiten en el top mensual segun criterio (como minimo estar en el top 2 meses)
n_month = 2 #Criterio, como minimo estar en el top 2 meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) / 2 #como minimo estar en el top la mitad de los meses
#n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique()) #Estar en el top todos los meses
df_top_categorias = df[df['Proveedor '].isin(categoria_count[categoria_count >= n_month].index.to_list())]
df_top_categorias['Proveedor '] = pd.Categorical(df_top_categorias['Proveedor '], categories=categoria_count[categoria_count >= n_month].index.to_list())
ggplot(df_top_categorias, aes(y='delta_Fecha_aprobación_transacción_day_int', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 1, scales='free') + coord_flip() + theme_bw() + guides(size = False) + theme(legend_position='none', panel_spacing_y=0.8, aspect_ratio = 0.5, strip_text_x = element_text(size = 10), strip_margin=-0.3, strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (139893405213)>
#Estadisticas del delta de fecha aprobacion y transaccion por Proveedor del top mes a mes
stats = df_top_categorias.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | soc. c. de los lagos | 2022-02 | 4.000000000 | 195.750000000 | 4.991659711 | 192.000000000 | 192.750000000 | 194.000000000 | 197.000000000 | 203.000000000 |
| 1 | soc. c. de los lagos | 2022-03 | 1.000000000 | 148.000000000 | NaN | 148.000000000 | 148.000000000 | 148.000000000 | 148.000000000 | 148.000000000 |
| 2 | soc. c. de los lagos | 2022-05 | 1.000000000 | 5.000000000 | NaN | 5.000000000 | 5.000000000 | 5.000000000 | 5.000000000 | 5.000000000 |
| 3 | soc. c. de los lagos | 2022-06 | 1.000000000 | 7.000000000 | NaN | 7.000000000 | 7.000000000 | 7.000000000 | 7.000000000 | 7.000000000 |
| 4 | soc. c. de los lagos | 2022-07 | 5.000000000 | 3.600000000 | 1.516575089 | 2.000000000 | 2.000000000 | 4.000000000 | 5.000000000 | 5.000000000 |
#plot Monto total por mes por proveedor del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139883934633)>
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='50%', x = 'Fecha_de_la_transacción_year_month ', fill = 'count')) + geom_bar(stat='sum') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "N° registros")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139883292495)>
#Monto total por proveedor y por volumen de registros del top mes a mes
ggplot(stats, aes(y='count', x = 'Fecha_de_la_transacción_year_month ', fill = '50%')) + geom_bar(stat='sum') + xlab('Mes') + ylab('N° registros') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank()) + labs(fill = "Tiempo (dias)")
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139882795774)>
#boxplot monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139882808339)>
#Violin monto por mes por proveedor
ggplot(df_top_categorias, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int')) + geom_violin(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:151: PlotnineWarning: To compute the density of a group with only one value set the bandwidth manually. e.g `bw=0.1`
c:\Python310\lib\site-packages\plotnine\stats\stat_density.py:154: PlotnineWarning: Groups with fewer than 2 data points have been removed.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139877787132)>
#Mediana vs promedio por proveedor
stats_melt = pd.melt(stats[['Fecha_de_la_transacción_year_month', 'Proveedor ', 'mean','50%']].rename(columns = {'mean':'Promedio', '50%':'Mediana'}), id_vars=['Fecha_de_la_transacción_year_month', 'Proveedor '], value_vars=['Promedio', 'Mediana'])
ggplot(stats_melt, aes(y='value', x = 'Fecha_de_la_transacción_year_month ', fill='variable')) + geom_bar(stat='sum', position='dodge') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Proveedor ', ncol = 2, scales='free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1, panel_spacing_y = 0.9, aspect_ratio = 0.6, axis_text_x=element_text(rotation=50, hjust=1), strip_text_x = element_text(size = 8), strip_background=element_blank())
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (139878135352)>
Tiempo en grupo monto sobre / bajo 60.000 CLP por tipo de documento
Tiempo en grupo monto sobre / bajo 60.000 CLP por Fondo fijo / Anticipo / Reembolso
Tiempo en grupo monto sobre / bajo 60.000 CLP por Sucursal
#Agregar etiqueta de monto outlier
outliers_threshold = df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].mean().to_frame() + 3*df.groupby(['Fecha_de_la_transacción_year_month'])['Cantidad '].agg('std').to_frame()
outliers_threshold.columns = ['outliers_threshold']
outliers_threshold = outliers_threshold.reset_index()
df = pd.merge(df, outliers_threshold, how='left', on=['Fecha_de_la_transacción_year_month'])
df['outliers_threshold']
df['outlier'] = df['Cantidad '] > df['outliers_threshold']
df['outlier_label'] = 'no outlier'
df['outlier_label'][df['outlier']] = 'outlier'
C:\Users\mwils\AppData\Local\Temp\ipykernel_26872\1116461469.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Monto en grupo monto atipico / no atipico
#Estadisitcas monto outlier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | 1536.000000000 | 26471.211601562 | 43069.606540885 | 0.000000000 | 4705.000000000 | 13300.000000000 | 30404.250000000 | 405590.000000000 | 24.122147455 | 40659781.020000003 |
| 1 | 2022-1 | outlier | 20.000000000 | 916522.825000000 | 534145.878713052 | 430000.000000000 | 526700.000000000 | 674475.000000000 | 1122565.500000000 | 2176000.000000000 | 0.098491503 | 18330456.500000000 |
| 2 | 2022-2 | no outlier | 1444.000000000 | 26134.978552632 | 39306.826593109 | 0.000000000 | 5000.000000000 | 13200.000000000 | 30445.000000000 | 300000.000000000 | 18.732447239 | 37738909.030000001 |
| 3 | 2022-2 | outlier | 12.000000000 | 807491.500000000 | 607607.910171668 | 338300.000000000 | 397012.500000000 | 634504.000000000 | 955421.250000000 | 2490722.000000000 | 5.483014428 | 9689898.000000000 |
#Plot monto total grupos de monto outlier y no outlier
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap('outlier_label', ncol = 2) + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188940869065)>
#Volumen de registros y Monto por mes en grupos de monto outlier y no outlier
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap('outlier_label', ncol = 2) + theme_bw() + coord_flip()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188938815528)>
#boxplot monto por mes en grupos de monto outlier y no outlier
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
<ggplot: (188935603285)>
#Zoom a grupo de monto no outlier
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
<ggplot: (188938429437)>
Monto en grupo monto atipico / no atipico por Tipo de documento
#Estadisticas en grupos de monto oulier y no oulier por tipo de documento
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | Tipo Documento | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | CHL - Documento Exento | 1361.000000000 | 24820.552549596 | 39878.100392706 | 0.000000000 | 4950.000000000 | 12950.000000000 | 30000.000000000 | 405590.000000000 | 29.157130110 | 33780772.020000003 |
| 1 | 2022-1 | no outlier | CHL - Factura Afecta | 175.000000000 | 39308.622857143 | 61218.730443279 | 700.000000000 | 2020.000000000 | 20000.000000000 | 45568.000000000 | 357814.000000000 | 9.123795484 | 6879009.000000000 |
| 2 | 2022-1 | outlier | CHL - Documento Exento | 10.000000000 | 936689.150000000 | 613282.278861865 | 430000.000000000 | 492943.000000000 | 600950.000000000 | 1373804.500000000 | 2176000.000000000 | 0.087396082 | 9366891.500000000 |
| 3 | 2022-1 | outlier | CHL - Factura Afecta | 10.000000000 | 896356.500000000 | 474664.159614458 | 500000.000000000 | 602878.750000000 | 740725.000000000 | 932662.500000000 | 1766400.000000000 | 0.611803850 | 8963565.000000000 |
| 4 | 2022-2 | no outlier | CHL - Documento Exento | 1332.000000000 | 23343.070255255 | 33300.596306346 | 0.000000000 | 4900.000000000 | 12000.000000000 | 29975.000000000 | 261863.000000000 | 19.136446232 | 31092969.579999998 |
| 5 | 2022-2 | no outlier | CHL - Factura Afecta | 112.000000000 | 59338.745089286 | 74714.257844758 | 43.450000000 | 17404.250000000 | 30649.500000000 | 61427.250000000 | 300000.000000000 | 4.014234385 | 6645939.450000000 |
| 6 | 2022-2 | outlier | CHL - Documento Exento | 7.000000000 | 785286.142857143 | 776095.398478054 | 338300.000000000 | 363511.500000000 | 403350.000000000 | 768804.000000000 | 2490722.000000000 | 5.642201215 | 5497003.000000000 |
| 7 | 2022-2 | outlier | CHL - Factura Afecta | 5.000000000 | 838579.000000000 | 331222.351231012 | 452200.000000000 | 539000.000000000 | 904995.000000000 | 1106700.000000000 | 1190000.000000000 | -2.726125246 | 4192895.000000000 |
#Plot de monto por mes en grupos de monto oulier y no oulier por Tipo de documento
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('monto') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188936331532)>
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por tipo de documento
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188948857145)>
#boxplot monto por mes en grupos de monto oulier y no oulier por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Tipo Documento', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188944217113)>
#Zoom a grupo de monto no outlier por tipo de documento
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Tipo Documento')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
<ggplot: (188943828473)>
Monto en grupo monto atipico / no atipico por Fondo fijo / Anticipo / Reembolso
#Estadisticas de monto en grupos de monto oulier y no oulier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | Fondo fijo | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | ANTICIPO (VxR) | 40.000000000 | 75367.100000000 | 76952.080627575 | 936.000000000 | 17025.000000000 | 48500.000000000 | 94855.500000000 | 309600.000000000 | 2.098477259 | 3014684.000000000 |
| 1 | 2022-1 | no outlier | No aplica | 10.000000000 | 51582.500000000 | 101945.839303960 | 500.000000000 | 4600.000000000 | 8285.000000000 | 28878.750000000 | 326800.000000000 | 7.337277214 | 515825.000000000 |
| 2 | 2022-1 | no outlier | REEMBOLSO | 401.000000000 | 22754.711122195 | 37136.256018548 | 500.000000000 | 4270.000000000 | 10835.000000000 | 25990.000000000 | 326652.000000000 | 28.414926755 | 9124639.160000000 |
| 3 | 2022-1 | no outlier | Si | 1085.000000000 | 25810.721529954 | 41460.207927473 | 0.000000000 | 4760.000000000 | 14000.000000000 | 30417.000000000 | 405590.000000000 | 27.872578219 | 28004632.859999999 |
| 4 | 2022-1 | outlier | ANTICIPO (VxR) | 17.000000000 | 922296.705882353 | 538442.379216635 | 430000.000000000 | 535600.000000000 | 731850.000000000 | 996946.000000000 | 2176000.000000000 | 0.416853361 | 15679044.000000000 |
| 5 | 2022-1 | outlier | REEMBOLSO | 2.000000000 | 1024148.750000000 | 812553.726377601 | 449586.500000000 | 736867.625000000 | 1024148.750000000 | 1311429.875000000 | 1598711.000000000 | NaN | 2048297.500000000 |
| 6 | 2022-1 | outlier | Si | 1.000000000 | 603115.000000000 | NaN | 603115.000000000 | 603115.000000000 | 603115.000000000 | 603115.000000000 | 603115.000000000 | NaN | 603115.000000000 |
| 7 | 2022-2 | no outlier | ANTICIPO (VxR) | 42.000000000 | 62816.238095238 | 84525.341923637 | 1100.000000000 | 10000.000000000 | 19781.500000000 | 92695.000000000 | 300000.000000000 | 1.073895928 | 2638282.000000000 |
| 8 | 2022-2 | no outlier | No aplica | 9.000000000 | 54984.222222222 | 79673.236840827 | 4900.000000000 | 17489.000000000 | 30000.000000000 | 49582.000000000 | 261863.000000000 | 7.702282351 | 494858.000000000 |
| 9 | 2022-2 | no outlier | REEMBOLSO | 382.000000000 | 20517.504136126 | 31502.584787731 | 0.000000000 | 3607.500000000 | 9195.000000000 | 24630.000000000 | 298737.000000000 | 28.765970413 | 7837686.580000000 |
| 10 | 2022-2 | no outlier | Si | 1011.000000000 | 26476.837240356 | 37647.149728328 | 43.450000000 | 5200.000000000 | 14400.000000000 | 31900.000000000 | 300000.000000000 | 19.315008368 | 26768082.449999999 |
| 11 | 2022-2 | outlier | ANTICIPO (VxR) | 7.000000000 | 818643.285714286 | 273495.373322667 | 452200.000000000 | 634504.000000000 | 807600.000000000 | 1005847.500000000 | 1190000.000000000 | -1.215672903 | 5730503.000000000 |
| 12 | 2022-2 | outlier | REEMBOLSO | 2.000000000 | 1419872.500000000 | 1514409.886160448 | 349023.000000000 | 884447.750000000 | 1419872.500000000 | 1955297.250000000 | 2490722.000000000 | NaN | 2839745.000000000 |
| 13 | 2022-2 | outlier | Si | 3.000000000 | 373216.666666667 | 32787.739070167 | 338300.000000000 | 358150.000000000 | 378000.000000000 | 390675.000000000 | 403350.000000000 | NaN | 1119650.000000000 |
#Plot de monto por mes en grupos de monto oulier y no oulier por Fondo fijo
ggplot(stats, aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('monto') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.2, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188944331239)>
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por Fondo fijo
ggplot(stats, aes(x='count ',y='Fecha_de_la_transacción_year_month', color='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Mes') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2) + theme_bw() + coord_flip() + theme(panel_spacing_y=0.4, panel_spacing_x=0.5, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188944158693)>
#boxplot monto por mes en grupos de monto oulier y no oulier por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Fondo fijo', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188940772145)>
#Zoom a grupo de monto no outlier por fondo fijo
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
<ggplot: (188938132201)>
#Zoom a grupo de monto outlier por fondo fijo
ggplot(df[df['outlier'] == True], aes(x='Fecha_de_la_transacción_year_month',y='Cantidad ', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + coord_flip() + theme_bw()
<ggplot: (188944977374)>
Monto en grupo monto atipico / no atipico por Sucursal
#Estadisticas de monto en grupos de monto oulier y no oulier por Sucursal (head 5)
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Fecha_de_la_transacción_year_month | outlier_label | Sucursal | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | GESTION INTEGRAL DE RESIDUOS SPA | 29.000000000 | 14757.379310345 | 16716.822441169 | 730.000000000 | 2200.000000000 | 7000.000000000 | 20990.000000000 | 59080.000000000 | 0.568508935 | 427964.000000000 |
| 1 | 2022-1 | no outlier | PROCESOS SANTARIOS SPA | 284.000000000 | 35686.430633803 | 58982.947434996 | 700.000000000 | 5995.000000000 | 14400.000000000 | 40040.000000000 | 405590.000000000 | 15.287269935 | 10134946.300000001 |
| 2 | 2022-1 | no outlier | VEOLIA ENERGIA CHILE | 132.000000000 | 11218.703484848 | 14702.830954940 | 11.550000000 | 1997.500000000 | 4500.000000000 | 15948.500000000 | 75000.000000000 | 3.944176011 | 1480868.860000000 |
| 3 | 2022-1 | no outlier | VEOLIA HOLDING CHILE S.A. | 91.000000000 | 32357.152307692 | 59543.712096510 | 500.000000000 | 2200.000000000 | 10580.000000000 | 35503.580000000 | 326418.000000000 | 10.240855982 | 2944500.860000000 |
| 4 | 2022-1 | no outlier | VEOLIA RESIDUOS CHILE S.A. | 16.000000000 | 39382.875000000 | 26461.022257590 | 7000.000000000 | 20448.750000000 | 35858.000000000 | 49437.750000000 | 83311.000000000 | -0.814407178 | 630126.000000000 |
#Plot de monto por mes en grupo monto no outlier por Sucursal
ggplot(stats[stats['outlier_label'] == 'no outlier'], aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188944968539)>
#Plot de monto por mes en grupo de monto oulier por Sucursal
ggplot(stats[stats['outlier_label'] != 'no outlier'], aes(y='sum', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188933667387)>
#Volumen de registros y Monto por mes en grupos de monto oulier y no oulier por Sucursal
ggplot(stats, aes(x='count ',y='Sucursal ', color='sum')) + geom_point(aes(size='count')) + xlab('N° registros') + ylab('Sucursal') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188928731359)>
#boxplot monto por mes en grupos de monto oulier y no oulier por Sucursal
ggplot(df, aes(x='Sucursal ',y='Cantidad ', color = 'outlier_label')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + facet_wrap('Fecha_de_la_transacción_year_month ', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188948789882)>
#boxplot monto por mes en grupos de monto oulier y no oulier por Sucursal (igual que arriba, pero rota color = fecha, wrap = outlier )
ggplot(df, aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + facet_wrap('outlier_label', ncol = 2, scales = 'free') + coord_flip() + theme_bw() + theme(panel_spacing_x=3, panel_spacing_y=.5, aspect_ratio=4, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (188942673659)>
#Zoom a grupo de monto no outlier por sucursal
ggplot(df[df['outlier'] == False], aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
<ggplot: (188942691626)>
#Zoom a grupo de monto outlier por sucursal
ggplot(df[df['outlier'] == True], aes(x='Sucursal ',y='Cantidad ', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Monto (CLP)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
<ggplot: (188933660594)>
Monto en grupo monto atipico / no atipico por Categoria
#Estadisticas por categoria sobre grupos de monto oulier y no oulier (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aceite Hidraulico | 2022-1 | no outlier | 4.000000000 | 32492.500000000 | 13071.726167573 | 13000.000000000 | 30992.500000000 | 38490.000000000 | 39990.000000000 | 39990.000000000 | 3.750498016 | 129970.000000000 |
| 1 | Aceite Hidraulico | 2022-2 | no outlier | 4.000000000 | 40337.500000000 | 59449.378115615 | 5400.000000000 | 6585.000000000 | 13490.000000000 | 47242.500000000 | 128970.000000000 | 3.742463372 | 161350.000000000 |
| 2 | Aceite Motor | 2022-1 | no outlier | 8.000000000 | 12375.000000000 | 7293.588182193 | 1500.000000000 | 10875.000000000 | 13000.000000000 | 14000.000000000 | 26000.000000000 | 1.437663240 | 99000.000000000 |
| 3 | Aceite Motor | 2022-2 | no outlier | 3.000000000 | 21233.333333333 | 12568.346483660 | 13000.000000000 | 14000.000000000 | 15000.000000000 | 25350.000000000 | 35700.000000000 | NaN | 63700.000000000 |
| 4 | Alojamientos | 2022-1 | no outlier | 3.000000000 | 94333.333333333 | 51549.329125929 | 39000.000000000 | 71000.000000000 | 103000.000000000 | 122000.000000000 | 141000.000000000 | NaN | 283000.000000000 |
#top categorias con mayor monto por mes en grupos de monto oulier y no oulier (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
| Categoría | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | |||||||||||||
| 0 | Manutencion - Colacion | 2022-1 | no outlier | 260.000000000 | 26383.076038462 | 51245.734643259 | 886.600000000 | 5075.000000000 | 10545.000000000 | 20462.250000000 | 405590.000000000 | 21.749169761 | 6859599.770000000 |
| 1 | Peajes | 2022-1 | no outlier | 197.000000000 | 26694.974619289 | 40039.965832933 | 200.000000000 | 1600.000000000 | 14220.000000000 | 38600.000000000 | 286330.000000000 | 19.876583769 | 5258910.000000000 |
| 2 | Peajes | 2022-1 | outlier | 4.000000000 | 1235600.000000000 | 865701.164759911 | 500000.000000000 | 500000.000000000 | 1133200.000000000 | 1868800.000000000 | 2176000.000000000 | -4.912002825 | 4942400.000000000 |
| 3 | Impto veh. - P.circulacion - Rev. Tecnica - SOAP | 2022-1 | no outlier | 82.000000000 | 46858.512195122 | 68753.314855258 | 936.000000000 | 11451.250000000 | 18445.000000000 | 40219.250000000 | 326800.000000000 | 6.272290556 | 3842398.000000000 |
| 4 | Pasajes Aereos y terrestres al int (en Chile) | 2022-1 | outlier | 3.000000000 | 1230553.500000000 | 687183.973245702 | 449586.500000000 | 974505.250000000 | 1499424.000000000 | 1621037.000000000 | 1742650.000000000 | NaN | 3691660.500000000 |
#Top categorias por monto total por mes en grupos de monto oulier y no oulier
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188940947787)>
#calculo de top categorias que se repiten mes a mes por grupo de monto oulier y no oulier
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())
categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)
categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)
categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Categoría '].to_list(), df_aux['outlier_label'].to_list()))
df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
#df_top_categorias
#top categorias que se repiten mes a mes del grupo de monto no oulier
ggplot(df_top_categorias[df_top_categorias['outlier_label'] == 'no outlier'], aes(y='Cantidad ', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188942673713)>
#top categorias que se repiten mes a mes del grupo de monto outlier
ggplot(df_top_categorias[df_top_categorias['outlier_label'] != 'no outlier'], aes(y='Cantidad ', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188980256513)>
Monto en grupo monto atipico / no atipico por Proveedor
#Estadisticas por proveedores sobre grupos de monto oulier y no oulier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].describe()
stats['kurt'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].apply(pd.DataFrame.kurt).to_frame()
stats['sum'] = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['Cantidad '].sum('Cantidad ')
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2022-1 | no outlier | 4.000000000 | 19945.000000000 | 15184.458062989 | 0.000000000 | 13350.000000000 | 22015.000000000 | 28610.000000000 | 35750.000000000 | 0.434133850 | 79780.000000000 |
| 1 | 12636468-7 | 2022-2 | no outlier | 1.000000000 | 9000.000000000 | NaN | 9000.000000000 | 9000.000000000 | 9000.000000000 | 9000.000000000 | 9000.000000000 | NaN | 9000.000000000 |
| 2 | 14324088-6 | 2022-2 | no outlier | 1.000000000 | 13200.000000000 | NaN | 13200.000000000 | 13200.000000000 | 13200.000000000 | 13200.000000000 | 13200.000000000 | NaN | 13200.000000000 |
| 3 | 24100 | 2022-1 | no outlier | 1.000000000 | 24100.000000000 | NaN | 24100.000000000 | 24100.000000000 | 24100.000000000 | 24100.000000000 | 24100.000000000 | NaN | 24100.000000000 |
| 4 | 2DO JUZGADO POLICIA LOCAL ( ABONAR A CLAUDIO O... | 2022-1 | no outlier | 1.000000000 | 163326.000000000 | NaN | 163326.000000000 | 163326.000000000 | 163326.000000000 | 163326.000000000 | 163326.000000000 | NaN | 163326.000000000 |
#top proveedores con mayor monto por mes en grupos de monto oulier y no oulier (head 5)
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | kurt | sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | |||||||||||||
| 0 | RUTA DE LA ARAUCANIA SOCIEDAD CONCESIONARIA S.A | 2022-1 | outlier | 4.000000000 | 842300.000000000 | 617969.697207449 | 500000.000000000 | 500000.000000000 | 551400.000000000 | 893700.000000000 | 1766400.000000000 | 3.870873076 | 3369200.000000000 |
| 1 | Ruta de la Araucanía | 2022-1 | outlier | 2.000000000 | 1355800.000000000 | 1159937.963858413 | 535600.000000000 | 945700.000000000 | 1355800.000000000 | 1765900.000000000 | 2176000.000000000 | NaN | 2711600.000000000 |
| 2 | BYF CORREDORES DE SEGUROS LTDA | 2022-1 | no outlier | 20.000000000 | 111590.000000000 | 92384.772611758 | 16500.000000000 | 42650.000000000 | 86000.000000000 | 144300.000000000 | 326800.000000000 | 0.703073558 | 2231800.000000000 |
| 3 | PEAJES | 2022-1 | no outlier | 14.000000000 | 127977.142857143 | 97321.271643386 | 8500.000000000 | 33175.000000000 | 131870.000000000 | 191932.500000000 | 286330.000000000 | -1.156886909 | 1791680.000000000 |
| 4 | soc. y turismo cascada de las Animas | 2022-1 | outlier | 1.000000000 | 1742650.000000000 | NaN | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | 1742650.000000000 | NaN | 1742650.000000000 |
#Top proveedores por monto total por mes en grupos de monto oulier y no oulier
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='sum', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188944323104)>
#calculo de top proveedores que se repiten mes a mes por grupo de monto oulier y no oulier
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['sum'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())
categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)
categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)
categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Proveedor '].to_list(), df_aux['outlier_label'].to_list()))
df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
#top proveedores que se repiten mes a mes del grupo de monto no outlier
ggplot(df_top_categorias[df_top_categorias['outlier_label'] == 'no outlier'], aes(y='Cantidad ', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188943625862)>
#top proveedores que se repiten mes a mes del grupo de monto oulier
ggplot(df_top_categorias[df_top_categorias['outlier_label'] != 'no outlier'], aes(y='Cantidad ', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Monto') + facet_wrap(['outlier_label', 'Fecha_de_la_transacción_year_month'], ncol = 1) + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188940948123)>
Tiempo total en grupo monto atipico / no atipico
#Estadisitcas delta fecha aprobacion y transaccion sobre grupos de monto outier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | 1535 | 18 days 14:45:18.600895113 | 24 days 06:07:30.089167873 | 0 days 12:50:17.203000 | 3 days 22:16:23.328500 | 9 days 14:26:55.083000 | 22 days 13:58:07.938500 | 228 days 21:13:48.960000 |
| 1 | 2022-1 | outlier | 20 | 39 days 10:29:04.452300 | 49 days 04:05:52.966655539 | 0 days 19:58:53.307000 | 4 days 12:57:21.412750 | 21 days 16:14:05.965000 | 49 days 21:22:57.055000 | 184 days 02:40:04.157000 |
| 2 | 2022-2 | no outlier | 1443 | 19 days 07:33:21.484621621 | 28 days 08:36:53.664947149 | 0 days 14:02:14.193000 | 3 days 14:56:39.558500 | 7 days 16:11:19.140000 | 21 days 13:23:24.600000 | 203 days 15:40:56.447000 |
| 3 | 2022-2 | outlier | 12 | 17 days 10:26:07.342083333 | 29 days 21:01:01.697772761 | 0 days 21:29:30.367000 | 1 days 09:45:56.209500 | 2 days 13:43:37.801500 | 15 days 17:23:47.647750 | 83 days 17:37:37.997000 |
#Plot mediana delta fecha aprobacion y transaccion en grupos de monto outier y no outlier
stats = df.groupby(['Fecha_de_la_transacción_year_month','outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2) + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188933928702)>
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier
ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Mes') + facet_wrap('outlier_label', ncol = 2) + theme_bw() + coord_flip()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188934253088)>
#boxplot delta fecha aprobacion y transaccion por mes en grupos de monto outier y no outlier
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188922080833)>
Tiempo en grupo grupo monto atipico / no atipico por tipo de documento
#Estadisticas de delta fecha aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | Tipo Documento | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | CHL - Documento Exento | 1360 | 18 days 21:12:07.643848529 | 24 days 03:30:24.238639703 | 0 days 12:50:17.203000 | 3 days 21:38:23.359250 | 9 days 15:20:22.593000 | 22 days 20:58:25.916750 | 208 days 15:40:57.493000 |
| 1 | 2022-1 | no outlier | CHL - Factura Afecta | 175 | 16 days 12:39:11.181371428 | 25 days 01:26:01.807138667 | 0 days 14:57:56.833000 | 4 days 13:19:33.862000 | 6 days 22:03:23.927000 | 18 days 05:42:46.096500 | 228 days 21:13:48.960000 |
| 2 | 2022-1 | outlier | CHL - Documento Exento | 10 | 39 days 08:50:46.774700 | 62 days 00:33:02.268031093 | 0 days 19:58:53.307000 | 3 days 16:37:49.032250 | 4 days 07:46:57.368500 | 68 days 07:18:13.114250 | 184 days 02:40:04.157000 |
| 3 | 2022-1 | outlier | CHL - Factura Afecta | 10 | 39 days 12:07:22.129900 | 35 days 11:00:20.469659058 | 5 days 12:34:56.157000 | 21 days 15:49:31.206000 | 30 days 03:38:58.106500 | 36 days 15:00:08.862500 | 103 days 17:37:37.903000 |
| 4 | 2022-2 | no outlier | CHL - Documento Exento | 1331 | 19 days 18:59:05.559374154 | 29 days 02:31:48.883249929 | 0 days 14:02:14.193000 | 3 days 15:19:23.866500 | 7 days 18:13:58.177000 | 21 days 14:38:35.122000 | 203 days 15:40:56.447000 |
| 5 | 2022-2 | no outlier | CHL - Factura Afecta | 112 | 13 days 15:44:08.239125 | 16 days 04:49:13.403850101 | 0 days 16:47:08.880000 | 3 days 14:19:27.212250 | 7 days 11:56:44.890000 | 17 days 21:29:53.300500 | 85 days 15:54:25.647000 |
| 6 | 2022-2 | outlier | CHL - Documento Exento | 7 | 6 days 08:06:46.442571428 | 6 days 18:59:20.584001436 | 0 days 21:29:30.367000 | 1 days 14:22:32.282000 | 2 days 13:43:53.323000 | 10 days 13:34:54.697000 | 16 days 13:39:07.450000 |
| 7 | 2022-2 | outlier | CHL - Factura Afecta | 5 | 32 days 23:17:12.601400 | 43 days 05:20:13.146994361 | 0 days 21:40:40.883000 | 0 days 21:46:43.857000 | 2 days 13:43:22.280000 | 76 days 17:37:37.990000 | 83 days 17:37:37.997000 |
#Plot de delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Tipo de documento
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Tipo Documento'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188934253094)>
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Mes') + facet_wrap(['Tipo Documento', 'outlier_label'], ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por tipo de documento
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Monto (CLP)') + facet_wrap('Tipo Documento', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188938977944)>
Tiempo en grupo monto atipico / no atipico por Fondo fijo / Anticipo / Reembolso
#Estadisticas de delta de fecha de aprobacion y transaccion en grupos de monto outier y no outlier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats
| Fecha_de_la_transacción_year_month | outlier_label | Fondo fijo | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | ANTICIPO (VxR) | 40 | 47 days 22:36:30.679100 | 44 days 07:42:45.186159767 | 2 days 21:28:29.130000 | 3 days 21:26:10.033500 | 39 days 17:53:19.668500 | 86 days 18:30:56.782500 | 112 days 18:31:03.363000 |
| 1 | 2022-1 | no outlier | No aplica | 10 | 15 days 03:04:33.145400 | 10 days 02:33:36.277393134 | 3 days 21:26:09.297000 | 8 days 20:56:05.725250 | 14 days 02:56:05.743500 | 17 days 20:38:16.848000 | 39 days 14:14:10.410000 |
| 2 | 2022-1 | no outlier | REEMBOLSO | 400 | 18 days 03:38:43.134352499 | 23 days 20:34:21.485607245 | 0 days 13:56:17.123000 | 4 days 21:52:34.728250 | 10 days 13:36:29.502000 | 21 days 14:58:23.653000 | 228 days 21:13:48.960000 |
| 3 | 2022-1 | no outlier | Si | 1085 | 17 days 17:40:25.290336405 | 22 days 18:39:45.369740785 | 0 days 12:50:17.203000 | 3 days 17:48:35.467000 | 9 days 01:49:21.473000 | 22 days 14:07:33.910000 | 208 days 15:40:57.493000 |
| 4 | 2022-1 | outlier | ANTICIPO (VxR) | 17 | 29 days 17:43:10.038411764 | 35 days 00:01:26.696718079 | 0 days 19:58:53.307000 | 3 days 21:26:09.280000 | 21 days 15:24:56.447000 | 33 days 16:59:30.680000 | 103 days 17:37:37.903000 |
| 5 | 2022-1 | outlier | REEMBOLSO | 2 | 138 days 01:05:04.735000 | 65 days 03:31:50.582185716 | 91 days 23:30:05.313000 | 115 days 00:17:35.024000 | 138 days 01:05:04.735000 | 161 days 01:52:34.446000 | 184 days 02:40:04.157000 |
| 6 | 2022-1 | outlier | Si | 1 | 7 days 02:17:28.923000 | NaT | 7 days 02:17:28.923000 | 7 days 02:17:28.923000 | 7 days 02:17:28.923000 | 7 days 02:17:28.923000 | 7 days 02:17:28.923000 |
| 7 | 2022-2 | no outlier | ANTICIPO (VxR) | 41 | 49 days 00:58:11.847780488 | 28 days 12:20:11.070784460 | 0 days 16:21:41.257000 | 10 days 23:42:58.323000 | 61 days 18:31:03.363000 | 68 days 19:59:26.777000 | 78 days 18:30:56.837000 |
| 8 | 2022-2 | no outlier | No aplica | 9 | 16 days 12:50:18.923666666 | 18 days 13:09:45.994352181 | 1 days 02:22:16.203000 | 3 days 13:47:43.693000 | 6 days 02:22:16.177000 | 28 days 17:07:06.090000 | 49 days 14:39:48.870000 |
| 9 | 2022-2 | no outlier | REEMBOLSO | 382 | 20 days 13:56:54.576479057 | 27 days 15:55:26.582841934 | 0 days 14:02:14.193000 | 4 days 14:23:59.762000 | 9 days 20:08:38.930000 | 21 days 13:47:43.579750 | 161 days 22:22:53.340000 |
| 10 | 2022-2 | no outlier | Si | 1011 | 17 days 15:44:04.350170128 | 28 days 00:33:42.677869793 | 0 days 14:22:45.003000 | 3 days 14:19:27.180000 | 6 days 21:11:25.850000 | 18 days 15:26:15.522000 | 203 days 15:40:56.447000 |
| 11 | 2022-2 | outlier | ANTICIPO (VxR) | 7 | 24 days 17:31:34.953857142 | 38 days 00:05:34.076866379 | 0 days 21:40:40.883000 | 1 days 17:45:03.068500 | 2 days 13:43:53.323000 | 41 days 05:04:23.168500 | 83 days 17:37:37.997000 |
| 12 | 2022-2 | outlier | REEMBOLSO | 2 | 1 days 14:22:32.282000 | 0 days 00:52:08.176760358 | 1 days 13:45:40.327000 | 1 days 14:04:06.304500 | 1 days 14:22:32.282000 | 1 days 14:40:58.259500 | 1 days 14:59:24.237000 |
| 13 | 2022-2 | outlier | Si | 3 | 10 days 23:15:46.288000 | 8 days 17:48:53.652270656 | 0 days 21:29:30.367000 | 8 days 04:04:05.707000 | 15 days 10:38:41.047000 | 16 days 00:08:54.248500 | 16 days 13:39:07.450000 |
#Plot de mediana de delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Fondo fijo
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Fondo fijo'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats, aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.2, aspect_ratio = 0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188939122704)>
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Fondo fijo
ggplot(stats, aes(x='N° Registros',y='Fecha_de_la_transacción_year_month', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Mes') + facet_wrap(['Fondo fijo', 'outlier_label'], ncol = 2, scales='free') + theme_bw() + coord_flip() + theme(panel_spacing_y=0.4, panel_spacing_x=0.5, aspect_ratio = 0.4) + theme(panel_spacing_y=0.5, panel_spacing_x=0.5)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188925262284)>
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por fondo fijo
ggplot(df, aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo(dias)') + facet_wrap('Fondo fijo', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188943491181)>
#Zoom a grupo de monto no outlier por Fondo fijo
ggplot(df[df['outlier'] == False], aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188941331472)>
#Zoom a grupo de monto outlier por Fondo fijo
ggplot(df[df['outlier'] == True], aes(x='Fecha_de_la_transacción_year_month',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fondo fijo')) + geom_boxplot() + xlab('Mes') + ylab('Tiempo (dias)') + coord_flip() + theme_bw()
<ggplot: (188946369726)>
Tiempo en grupo monto atipico / no atipico por Sucursal
#Estadisticas del delta de fecha de aprobacion y transaccion en grupos de monto outier y no outlier por Sucursal (head 5)
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Fecha_de_la_transacción_year_month | outlier_label | Sucursal | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-1 | no outlier | GESTION INTEGRAL DE RESIDUOS SPA | 28 | 57 days 23:38:26.290428571 | 13 days 17:45:14.510641479 | 43 days 12:46:05.287000 | 50 days 18:54:36.242500 | 57 days 12:46:03.315000 | 59 days 19:45:46.077000 | 117 days 13:50:32.827000 |
| 1 | 2022-1 | no outlier | PROCESOS SANTARIOS SPA | 284 | 13 days 20:41:27.787309859 | 16 days 03:41:14.548368038 | 0 days 12:50:17.203000 | 4 days 21:52:34.927500 | 8 days 22:09:28.185000 | 16 days 00:39:24.182250 | 114 days 15:23:51.650000 |
| 2 | 2022-1 | no outlier | VEOLIA ENERGIA CHILE | 132 | 28 days 04:14:15.729045454 | 31 days 23:11:17.629796112 | 1 days 00:03:08.623000 | 5 days 16:30:34.804000 | 14 days 08:04:03.280000 | 37 days 11:52:28.393250 | 118 days 03:55:29.213000 |
| 3 | 2022-1 | no outlier | VEOLIA HOLDING CHILE S.A. | 91 | 24 days 11:18:41.364175824 | 22 days 12:12:41.932613080 | 0 days 21:07:27.457000 | 10 days 13:36:29.502000 | 16 days 15:20:22.343000 | 27 days 15:20:22.998500 | 106 days 17:54:42.327000 |
| 4 | 2022-1 | no outlier | VEOLIA RESIDUOS CHILE S.A. | 16 | 41 days 05:28:25.235062500 | 57 days 21:05:09.055380919 | 0 days 14:57:56.833000 | 8 days 12:17:07.009750 | 26 days 03:15:52.876500 | 36 days 03:05:30.682250 | 228 days 21:13:48.960000 |
#Plot de delta de fecha de aprobacion y transaccion por mes en grupo de monto no outlier por Sucursal
stats = df.groupby(['Fecha_de_la_transacción_year_month', 'outlier_label', 'Sucursal '])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats[stats['outlier_label'] == 'no outlier'], aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188938590929)>
#Plot de delta de fecha de aprobacion y transaccion por mes en grupo de monto outlier por Sucursal
ggplot(stats[stats['outlier_label'] != 'no outlier'], aes(y='Dias', x = 'Fecha_de_la_transacción_year_month ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('Mes') + ylab('Tiempo (dias)') + facet_wrap('Sucursal ', ncol = 2, scales = 'free') + theme_bw() + guides(size = False) + theme(panel_spacing_x=1,panel_spacing_y=0.5, aspect_ratio = 0.6)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188941366116)>
#Volumen de registros y delta de fecha de aprobacion y transaccion por mes en grupos sobre y bajo monto limite por Sucursal
ggplot(stats, aes(x='N° Registros',y='Sucursal ', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Sucursal') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188945653944)>
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos sobre y bajo monto limite por Sucursal
ggplot(df, aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'outlier_label')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('Fecha_de_la_transacción_year_month ', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188948863208)>
#boxplot delta de fecha de aprobacion y transaccion por mes en grupos de monto outier y no outlier por Sucursal (igual que arriba, pero rota color = fecha, wrap = outlier )
ggplot(df, aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2, scales = 'free') + coord_flip() + theme_bw() + theme(panel_spacing_x=3, panel_spacing_y=.5, aspect_ratio=4, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:390: PlotnineWarning: If you need more space for the x-axis tick text use ... + theme(subplots_adjust={'wspace': 0.25}). Choose an appropriate value for 'wspace'.
<ggplot: (188938591647)>
#Zoom a grupo monto no outlier por sucursal
ggplot(df[df['outlier'] == False], aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (Dias)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
c:\Python310\lib\site-packages\plotnine\layer.py:324: PlotnineWarning: stat_boxplot : Removed 2 rows containing non-finite values.
<ggplot: (188946066856)>
#Zoom a grupo monto outlier por sucursal
ggplot(df[df['outlier'] == True], aes(x='Sucursal ',y='delta_Fecha_aprobación_transacción_day_int', color = 'Fecha_de_la_transacción_year_month')) + geom_boxplot() + xlab('Sucursal') + ylab('Tiempo (Dias)') + coord_flip() + theme_bw() + theme(aspect_ratio=1.2)
<ggplot: (188980201518)>
Tiempo en grupo monto atipico / no atipico por Categoria
#Estadisticas de mediana del delta de fecha de aprobacion y transaccion por categoria sobre grupos de monto outlier y no outler (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Categoría | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aceite Hidraulico | 2022-1 | no outlier | 4 | 3 days 09:59:02.405000 | 1 days 09:38:23.331365018 | 1 days 21:24:30.983000 | 2 days 09:25:18.118250 | 3 days 13:26:20.508500 | 4 days 14:00:04.795250 | 4 days 15:38:57.620000 |
| 1 | Aceite Hidraulico | 2022-2 | no outlier | 4 | 2 days 00:00:15.409250 | 1 days 22:58:42.884627810 | 0 days 16:34:35.127000 | 0 days 19:51:23.124000 | 1 days 05:07:30.376500 | 2 days 09:16:22.661750 | 4 days 21:11:25.757000 |
| 2 | Aceite Motor | 2022-1 | no outlier | 8 | 4 days 00:37:36.423125 | 1 days 20:11:34.060700196 | 0 days 21:59:31.997000 | 3 days 02:29:40.134000 | 4 days 13:39:26.703500 | 5 days 13:55:16.872000 | 5 days 16:30:34.777000 |
| 3 | Aceite Motor | 2022-2 | no outlier | 3 | 4 days 11:12:35.054666666 | 4 days 00:18:16.952125675 | 0 days 15:38:27.007000 | 2 days 08:59:44.133500 | 4 days 02:21:01.260000 | 6 days 08:59:39.078500 | 8 days 15:38:16.897000 |
| 4 | Alojamientos | 2022-1 | no outlier | 3 | 12 days 14:10:01.822000 | 8 days 17:01:53.336304872 | 6 days 15:37:40.023000 | 7 days 14:11:15.778000 | 8 days 12:44:51.533000 | 15 days 13:26:12.721500 | 22 days 14:07:33.910000 |
#top categorias con mayor mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Categoría ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
| Categoría | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | |||||||||||
| 0 | Repuestos y mantto equipos - Veh. pesados | 2022-1 | outlier | 3.000000000 | 70.333333333 | 56.580326381 | 5.000000000 | 54.000000000 | 103.000000000 | 103.000000000 | 103.000000000 |
| 1 | Resto de coste indirecto - Multas | 2022-1 | outlier | 1.000000000 | 91.000000000 | NaN | 91.000000000 | 91.000000000 | 91.000000000 | 91.000000000 | 91.000000000 |
| 2 | Repuestos equipos y maquinaria | 2022-1 | no outlier | 1.000000000 | 63.000000000 | NaN | 63.000000000 | 63.000000000 | 63.000000000 | 63.000000000 | 63.000000000 |
| 3 | Materiales seguridad (Lentes, mascara, chaleco... | 2022-1 | no outlier | 2.000000000 | 48.500000000 | 60.104076401 | 6.000000000 | 27.250000000 | 48.500000000 | 69.750000000 | 91.000000000 |
| 4 | Gastos de formacion y capacitaciones | 2022-1 | no outlier | 2.000000000 | 45.500000000 | 20.506096654 | 31.000000000 | 38.250000000 | 45.500000000 | 52.750000000 | 60.000000000 |
#Top categorias por mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier
categoria_list = stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Categoría '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188949446479)>
#top categorias que se repiten mes a mes por grupo de monto outlier y no outler
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())
categoria_count = stats_top.groupby(['Categoría ', 'outlier_label'])['Categoría '].count().sort_values(ascending=False)
categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Categoría '].to_list(), df_aux['outlier_label'].to_list()))
df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
#Top categorias que aparecen mes a mes por grupo sobre y bajo el limite
stats = df_top_categorias.groupby(['Categoría ', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Categoría ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 1, scales = 'free') + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188949391775)>
#Volumetria de registros y mediana del delta de fecha de aprobacion y transaccion de categorias que aparecen mes a mes por grupo sobre y bajo el limite
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats, aes(x='N° Registros',y='Categoría ', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Categoria') + facet_wrap('outlier_label', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188943647060)>
#Boxplot de registros que pertenecen a las categorias que aparecen en el top mes a mes por grupo sobre y bajo el limite
ggplot(df_top_categorias, aes(x='Categoría ',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Categoria') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2) + coord_flip() + theme_bw() + theme(panel_spacing_x=1, panel_spacing_y=.5, aspect_ratio=2, axis_text_x=element_text(rotation=75, hjust=1))
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188933654706)>
Tiempo en grupo monto atipico / no atipico por Proveedor
#Estadisticas de mediana del delta de fecha de aprobacion y transaccion por proveedores sobre grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción'].describe()
stats = stats.reset_index()
stats.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2022-1 | no outlier | 4 | 3 days 23:43:18.438250 | 2 days 16:23:16.663693644 | 1 days 15:02:04.650000 | 2 days 15:34:06.047250 | 3 days 05:55:43.390000 | 4 days 14:04:55.781000 | 7 days 19:59:42.323000 |
| 1 | 12636468-7 | 2022-2 | no outlier | 1 | 34 days 16:50:17.367000 | NaT | 34 days 16:50:17.367000 | 34 days 16:50:17.367000 | 34 days 16:50:17.367000 | 34 days 16:50:17.367000 | 34 days 16:50:17.367000 |
| 2 | 14324088-6 | 2022-2 | no outlier | 1 | 21 days 16:50:17.250000 | NaT | 21 days 16:50:17.250000 | 21 days 16:50:17.250000 | 21 days 16:50:17.250000 | 21 days 16:50:17.250000 | 21 days 16:50:17.250000 |
| 3 | 24100 | 2022-1 | no outlier | 1 | 7 days 21:25:58.807000 | NaT | 7 days 21:25:58.807000 | 7 days 21:25:58.807000 | 7 days 21:25:58.807000 | 7 days 21:25:58.807000 | 7 days 21:25:58.807000 |
| 4 | 2DO JUZGADO POLICIA LOCAL ( ABONAR A CLAUDIO O... | 2022-1 | no outlier | 1 | 36 days 21:26:56.093000 | NaT | 36 days 21:26:56.093000 | 36 days 21:26:56.093000 | 36 days 21:26:56.093000 | 36 days 21:26:56.093000 | 36 days 21:26:56.093000 |
#top proveedores con mayor mediana del delta de fecha de aprobacion y transaccion por mes en grupos de monto outlier y no outlier (head 5)
stats = df.groupby(['Proveedor ', 'Fecha_de_la_transacción_year_month', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
stats_top.head(5)
| Proveedor | Fecha_de_la_transacción_year_month | outlier_label | count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Index | |||||||||||
| 0 | Ruta La Araucanía S.A. | 2022-1 | no outlier | 1.000000000 | 208.000000000 | NaN | 208.000000000 | 208.000000000 | 208.000000000 | 208.000000000 | 208.000000000 |
| 1 | China King | 2022-1 | no outlier | 1.000000000 | 207.000000000 | NaN | 207.000000000 | 207.000000000 | 207.000000000 | 207.000000000 | 207.000000000 |
| 2 | Claudio DOugnac | 2022-1 | outlier | 1.000000000 | 184.000000000 | NaN | 184.000000000 | 184.000000000 | 184.000000000 | 184.000000000 | 184.000000000 |
| 3 | CENCOSUD EASY | 2022-1 | no outlier | 1.000000000 | 140.000000000 | NaN | 140.000000000 | 140.000000000 | 140.000000000 | 140.000000000 | 140.000000000 |
| 4 | Eco kleen car wash | 2022-1 | no outlier | 1.000000000 | 117.000000000 | NaN | 117.000000000 | 117.000000000 | 117.000000000 | 117.000000000 | 117.000000000 |
#Top proveedores por mediana del delta de fecha de aprobacion y transaccion por mes en grupos monto outlier y no outlier
categoria_list = stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label']
categoria_list = categoria_list.tolist()[::-1]
categoria_cat = pd.Categorical(stats_top['Proveedor '] + ' - ' + stats_top['Fecha_de_la_transacción_year_month'] + ' - ' + stats_top['outlier_label'], categories=categoria_list)
stats_top_aux = stats_top.assign(categoria_cat = categoria_cat)
ggplot(stats_top_aux, aes(y='50%', x = 'categoria_cat')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap(['Fecha_de_la_transacción_year_month', 'outlier_label'], scales='free', ncol = 1) + coord_flip() + theme_bw() + theme(legend_position='none') + theme( panel_spacing_y=1, aspect_ratio=0.4)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
c:\Python310\lib\site-packages\plotnine\facets\facet.py:396: PlotnineWarning: If you need more space for the y-axis tick text use ... + theme(subplots_adjust={'hspace': 0.25}). Choose an appropriate value for 'hspace'
<ggplot: (188938644769)>
#top Proveedores que se repiten mes a mes por grupo de monto outlier y no outlier
stats_top = stats.groupby('Fecha_de_la_transacción_year_month').apply(lambda x: x.sort_values(['50%'],ascending = False)).assign(Index=range(len(stats))).set_index('Index').groupby(['Fecha_de_la_transacción_year_month', 'outlier_label']).head(10)
n_month = len(stats_top['Fecha_de_la_transacción_year_month'].unique())
categoria_count = stats_top.groupby(['Proveedor ', 'outlier_label'])['Proveedor '].count().sort_values(ascending=False)
categoria_count = categoria_count[categoria_count == n_month].index.to_list()
df_aux = df
df_aux['list_of_tuples'] = list(map(lambda x, y: (x, y), df_aux['Proveedor '].to_list(), df_aux['outlier_label'].to_list()))
df_top_categorias = df_aux[df_aux['list_of_tuples'].isin(categoria_count)]
#Top proveedores que aparecen mes a mes por grupo de monto outlier y no outlier
stats = df_top_categorias.groupby(['Proveedor ', 'outlier_label'])['delta_Fecha_aprobación_transacción_day_int'].describe()
stats = stats.reset_index()
ggplot(stats, aes(y='50%', x = 'Proveedor ')) + geom_bar(stat='sum', fill='#0C475B') + xlab('') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 1, scales = 'free') + coord_flip() + theme_bw() + guides(size = False)
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188939410429)>
#Volumetria de registros y mediana del delta de fecha de aprobacion y transaccion de categorias que aparecen mes a mes por grupo de monto outlier y no outlier
stats = stats.rename(columns = {'50%':'Dias', 'count':'N° Registros'})
ggplot(stats, aes(x='N° Registros',y='Proveedor ', color='Dias')) + geom_point(aes(size='N° Registros')) + xlab('N° registros') + ylab('Sucursal') + facet_wrap('outlier_label', ncol = 2) + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188933659832)>
#Boxplot de registros que pertenecen a los proveedores que aparecen en el top mes a mes por grupo sobre y bajo el limite
ggplot(df_top_categorias, aes(x='Proveedor ',y='delta_Fecha_aprobación_transacción_day_int')) + geom_boxplot(colour="#0C475B", fill="#658CB9") + xlab('Sucursal') + ylab('Tiempo (dias)') + facet_wrap('outlier_label', ncol = 2) + coord_flip() + theme_bw()
c:\Python310\lib\site-packages\plotnine\utils.py:371: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
<ggplot: (188945593477)>